我正在尝试构建一个创建PDF并保存在本地文件中的应用程序。我在C#中通过iTextSharp使用它。我需要保存至少10万个PDF文件。
在10 000个文件之后,保存变慢。第一个10k在2分钟内保存,其余文件大约需要5个小时。
我认为问题在于使用内存。但我找不到解决方案来解决这个问题。我已经发布了源代码。
private void CreatePDF11()
{
Queue<String> QTempFileNames;
QTempFileNames = new Queue<string>();
string fileName = string.Empty;
DateTime fileCreationDatetime = DateTime.Now;
fileName = @"D:\StatementMassPrint\tesystw14j414435.PDF";
string DestinationDirectory = @"D:\StatementMassPrint";
string StrTempFilePath = String.Empty;
string pdfPath = fileName;
int TempPdfFileName = 1;
if (DestinationDirectory != string.Empty)
{
StrTempFilePath = DestinationDirectory + "temp" + "(" + DateTime.Now.ToFileTime() + ")";
}
if (!(Directory.Exists(StrTempFilePath)))
{
Directory.CreateDirectory(StrTempFilePath);
}
string reportFileName = StrTempFilePath + "\\" + TempPdfFileName.ToString() + ".Pdf";
for (int f = 0; f < 100000; f++)
{
reportFileName = StrTempFilePath + "\\" + TempPdfFileName.ToString() + ".Pdf";
using (FileStream msReport = new FileStream(reportFileName, FileMode.Create))
{
//step 1
using (Document pdfDoc = new Document(PageSize.A5.Rotate(), 10f, 10f, 200f,40f))
{
try
{
// step 2
PdfWriter pdfWriter = PdfWriter.GetInstance(pdfDoc, msReport);
pdfWriter.PageEvent = new EpisodePageHeaderAndFooter();
//open the stream
pdfDoc.Open();
DataTable dtEpisodeWise = new DataTable();
dtEpisodeWise.Columns.Add("INVOICE_NO");
dtEpisodeWise.Columns.Add("INVOICE_DATE");
dtEpisodeWise.Columns.Add("CODE");
dtEpisodeWise.Columns.Add("SERVICE_DESCRIPTION");
dtEpisodeWise.Columns.Add("QTY",typeof(decimal));
dtEpisodeWise.Columns.Add("UNIT_PRICE",typeof(decimal));
dtEpisodeWise.Columns.Add("GROSS",typeof(decimal));
dtEpisodeWise.Columns.Add("DISCOUNT",typeof(decimal));
dtEpisodeWise.Columns.Add("NET",typeof(decimal));
dtEpisodeWise.Columns.Add("DEDUCTION",typeof(decimal));
dtEpisodeWise.Columns.Add("NET_PAYABLE_WITHOUT_VAT",typeof(decimal));
dtEpisodeWise.Columns.Add("VAT",typeof(decimal));
dtEpisodeWise.Columns.Add("NET_PAYABLE_WITH_VAT",typeof(decimal));
PdfPTable table = new PdfPTable(dtEpisodeWise.Columns.Count);
table.WidthPercentage = 100;
Font fontH1 = new Font(Font.FontFamily.HELVETICA, 6, Font.BOLDITALIC);
for (int k = 0; k < dtEpisodeWise.Columns.Count; k++)
{
string str = System.Globalization.CultureInfo.CurrentCulture.TextInfo.ToTitleCase(dtEpisodeWise.Columns[k].ColumnName.Replace("_", " ").ToLower()); ;
PdfPCell cell = new PdfPCell(new Phrase(str,fontH1));
cell.HorizontalAlignment = PdfPCell.ALIGN_CENTER;
cell.VerticalAlignment = PdfPCell.ALIGN_CENTER;
// cell.BackgroundColor = new iTextSharp.text.BaseColor(51, 102, 102);
table.AddCell(cell);
}
// for (int i = 0; i < 1000; i++)
// {
dtEpisodeWise.Rows.Add("CR100005", "25-05-1989", "CPT004", "SERVICE005", 1, 10, 100, 10, 90, 45, 45, 5, 50);
dtEpisodeWise.Rows.Add("CR100006", "25-05-1992", "CPT00555", "SERVICE105",6, 60, 600, 60, 450, 45, 45, 5, 500);
// }
Font fontH2 = new Font(Font.FontFamily.HELVETICA, 6, Font.ITALIC);
for (int i = 0; i < dtEpisodeWise.Rows.Count; i++)
{
for (int j = 0; j < dtEpisodeWise.Columns.Count; j++)
{
PdfPCell cell = new PdfPCell(new Phrase(dtEpisodeWise.Rows[i][j].ToString(),fontH2));
//Align the cell in the center
if (dtEpisodeWise.Columns[j].DataType == typeof(decimal))
{
cell.HorizontalAlignment = PdfPCell.ALIGN_RIGHT;
cell.VerticalAlignment = PdfPCell.ALIGN_CENTER;
}
else
{
cell.HorizontalAlignment = PdfPCell.ALIGN_LEFT;
cell.VerticalAlignment = PdfPCell.ALIGN_CENTER;
}
table.HeaderRows = 1;
table.AddCell(cell);
}
}
pdfDoc.Add(table);
pdfDoc.Close();
TempPdfFileName++;
}
catch (Exception ex)
{
//handle exception
}
finally
{
}
}
}
}
}
好吧,有一些你还没有处理掉的东西,这可能会导致问题。其中一个,DataTable
实施IDisposable
,PdfWriter
也是如此。
对于PdfWriter
,你真的不需要它作为变量声明,因为你还没有真正使用它,所以你可以像处理它一样
using (PdfWriter.GetInstance(pdfDoc, msReport))
{
// ...
}
在需要注意的事情上,您是否正在for循环中随时重新创建字体fontH1
和fontH2
。我没有真正看到它的需要,它们在程序的整个运行过程中都没有改变,所以你可以将它们作为局部变量声明在循环之外,或者作为创建pdf的类中静态字段的一部分。
另一件事是你似乎一遍又一遍地重新创建相同的数据表,现在我可以假设对于你当前的代码,这只是虚拟代码,但从我看到的,你没有必要在里面创建数据表你的循环,我宁愿认为它可能是创建你的pdf文件的参数(就像文件名一样),所以你可以重写你的代码来在自己的类中创建pdf(我把它命名为PdfModule
但是我敢打赌它有很多更好的名称:)),并重组您的代码,以处理当时的1个文件,例如:
public class PdfModule
{
private static readonly Font H1Font = new Font(Font.FontFamily.HELVETICA, 6, Font.BOLDITALIC);
private static readonly Font H2Font = new Font(Font.FontFamily.HELVETICA, 6, Font.ITALIC);
public static void CreateFile(string filename, DataTable data)
{
using (var msReport = new FileStream(filename, FileMode.Create, FileAccess.Write))
{
using (var pdfDoc = new Document(PageSize.A5.Rotate(), 10f, 10f, 200f, 40f))
{
using (PdfWriter.GetInstance(pdfDoc, msReport))
{
pdfDoc.Open();
var table = new PdfPTable(data.Columns.Count)
{
WidthPercentage = 100,
HeaderRows = 1
};
for (var k = 0; k < data.Columns.Count; k++)
{
var str =
System.Globalization.CultureInfo.CurrentCulture.TextInfo.ToTitleCase(data
.Columns[k].ColumnName.Replace("_", " ").ToLower());
;
var cell = new PdfPCell(new Phrase(str, H1Font))
{
HorizontalAlignment = Element.ALIGN_CENTER,
VerticalAlignment = Element.ALIGN_CENTER
};
table.AddCell(cell);
}
for (var i = 0; i < data.Rows.Count; i++)
{
for (var j = 0; j < data.Columns.Count; j++)
{
var cell =
new PdfPCell(new Phrase(data.Rows[i][j].ToString(), H2Font))
{
VerticalAlignment = Element.ALIGN_CENTER,
HorizontalAlignment = data.Columns[j].DataType == typeof(decimal)
? Element.ALIGN_RIGHT
: Element.ALIGN_LEFT
};
table.AddCell(cell);
}
}
pdfDoc.Add(table);
pdfDoc.Close();
}
}
}
}
}
这会将字体声明保持为类级别的静态字段,因此在程序运行期间仅将其初始化一次,并编写1个单个文件,其中1个单独的DataTable
构建文档。两者都作为参数发送。
为了使用这个类,我编写了以下示例代码,它似乎可以非常快地生成100.000文件(没有在您提到的5小时附近)。
请注意,我没有关于如何在真实程序中处理/填充数据表的任何细节,但至少它会为您提供有关如何重构代码的基本知识,并从那里开始
internal class Program
{
private static DataTable CreateDataTable( IEnumerable<object[]> rawData )
{
var datatable = new DataTable();
datatable.Columns.Add("INVOICE_NO");
datatable.Columns.Add("INVOICE_DATE");
datatable.Columns.Add("CODE");
datatable.Columns.Add("SERVICE_DESCRIPTION");
datatable.Columns.Add("QTY", typeof(decimal));
datatable.Columns.Add("UNIT_PRICE", typeof(decimal));
datatable.Columns.Add("GROSS", typeof(decimal));
datatable.Columns.Add("DISCOUNT", typeof(decimal));
datatable.Columns.Add("NET", typeof(decimal));
datatable.Columns.Add("DEDUCTION", typeof(decimal));
datatable.Columns.Add("NET_PAYABLE_WITHOUT_VAT", typeof(decimal));
datatable.Columns.Add("VAT", typeof(decimal));
datatable.Columns.Add("NET_PAYABLE_WITH_VAT", typeof(decimal));
foreach (var row in rawData)
{
datatable.Rows.Add(row);
}
return datatable;
}
public static void Main(string[] args)
{
var rowData = new List<object[]>()
{
new object[] { "CR100005", "25-05-1989", "CPT004", "SERVICE005", 1, 10, 100, 10, 90,
45, 45, 5, 50 },
new object[] { "CR100006", "25-05-1992", "CPT00555", "SERVICE105", 6, 60, 600, 60,
450, 45, 45, 5, 500 }
};
var pdfModule = new PdfModule();
var outDirectory = Path.Combine(Environment.CurrentDirectory, "Output");
if (!Directory.Exists(outDirectory))
{
// well theoretically I should just create the directory and worry about conflicts differently
Directory.CreateDirectory(outDirectory);
}
Console.WriteLine( $"Creating files to {outDirectory}");
var nrOfFiles = 100000;
var stepCount = 1000;
for (var i = 0; i < nrOfFiles; i++)
{
if (i % stepCount == 0)
{
Console.WriteLine($"Creating files {i}-{i+stepCount-1}" );
}
var filename = Path.Combine(outDirectory, $"{i}.pdf");
using (var dataTable = CreateDataTable(rowData))
{
pdfModule.CreateFile(filename, dataTable);
}
}
Console.WriteLine($"Done, created {nrOfFiles} files");
}
}
对于其他人,我不确定我的环境是否与您的环境相匹配,但它始终如一地运行(通过ITextSharp
的通知,我没有有效的AGPL许可证^ _ ^)我在Linux下使用Rider IDE运行它