在iTextSharp中将10,000页后的PDF文件保存得很慢

问题描述 投票:-1回答:1

我正在尝试构建一个创建PDF并保存在本地文件中的应用程序。我在C#中通过iTextSharp使用它。我需要保存至少10万个PDF文件。

在10 000个文件之后,保存变慢。第一个10k在2分钟内保存,其余文件大约需要5个小时。

我认为问题在于使用内存。但我找不到解决方案来解决这个问题。我已经发布了源代码。

private void CreatePDF11()
{
    Queue<String> QTempFileNames;
    QTempFileNames = new Queue<string>();
    string fileName = string.Empty;

    DateTime fileCreationDatetime = DateTime.Now;

    fileName = @"D:\StatementMassPrint\tesystw14j414435.PDF";

    string DestinationDirectory = @"D:\StatementMassPrint";

    string StrTempFilePath = String.Empty;

    string pdfPath = fileName;

    int TempPdfFileName = 1;
    if (DestinationDirectory != string.Empty)
    {
        StrTempFilePath = DestinationDirectory + "temp" + "(" + DateTime.Now.ToFileTime() + ")";
    }

    if (!(Directory.Exists(StrTempFilePath)))
    {
        Directory.CreateDirectory(StrTempFilePath);
    }
        string reportFileName = StrTempFilePath + "\\" + TempPdfFileName.ToString() + ".Pdf";

        for (int f = 0; f < 100000; f++)
        {
            reportFileName = StrTempFilePath + "\\" + TempPdfFileName.ToString() + ".Pdf";

            using (FileStream msReport = new FileStream(reportFileName, FileMode.Create))
    {
        //step 1
        using (Document pdfDoc = new Document(PageSize.A5.Rotate(), 10f, 10f, 200f,40f))
        {
            try
            {
                // step 2
                PdfWriter pdfWriter = PdfWriter.GetInstance(pdfDoc, msReport);
                pdfWriter.PageEvent = new EpisodePageHeaderAndFooter();

                //open the stream 
                pdfDoc.Open();

                DataTable dtEpisodeWise = new DataTable();
                dtEpisodeWise.Columns.Add("INVOICE_NO");
                dtEpisodeWise.Columns.Add("INVOICE_DATE");
                dtEpisodeWise.Columns.Add("CODE");
                dtEpisodeWise.Columns.Add("SERVICE_DESCRIPTION");
                dtEpisodeWise.Columns.Add("QTY",typeof(decimal));
                dtEpisodeWise.Columns.Add("UNIT_PRICE",typeof(decimal));
                dtEpisodeWise.Columns.Add("GROSS",typeof(decimal));
                dtEpisodeWise.Columns.Add("DISCOUNT",typeof(decimal));
                dtEpisodeWise.Columns.Add("NET",typeof(decimal));
                dtEpisodeWise.Columns.Add("DEDUCTION",typeof(decimal));
                dtEpisodeWise.Columns.Add("NET_PAYABLE_WITHOUT_VAT",typeof(decimal));
                dtEpisodeWise.Columns.Add("VAT",typeof(decimal));
                dtEpisodeWise.Columns.Add("NET_PAYABLE_WITH_VAT",typeof(decimal));

                PdfPTable table = new PdfPTable(dtEpisodeWise.Columns.Count);
                table.WidthPercentage = 100;

                Font fontH1 = new Font(Font.FontFamily.HELVETICA, 6, Font.BOLDITALIC);
                for (int k = 0; k < dtEpisodeWise.Columns.Count; k++)
                {

                    string str = System.Globalization.CultureInfo.CurrentCulture.TextInfo.ToTitleCase(dtEpisodeWise.Columns[k].ColumnName.Replace("_", " ").ToLower()); ;
                    PdfPCell cell = new PdfPCell(new Phrase(str,fontH1));
                    cell.HorizontalAlignment = PdfPCell.ALIGN_CENTER;
                    cell.VerticalAlignment = PdfPCell.ALIGN_CENTER;
                    //    cell.BackgroundColor = new iTextSharp.text.BaseColor(51, 102, 102);

                    table.AddCell(cell);
                }

             //   for (int i = 0; i < 1000; i++)
             //   {
                    dtEpisodeWise.Rows.Add("CR100005", "25-05-1989", "CPT004", "SERVICE005", 1, 10, 100, 10, 90, 45, 45, 5, 50);
                    dtEpisodeWise.Rows.Add("CR100006", "25-05-1992", "CPT00555", "SERVICE105",6, 60, 600, 60, 450, 45, 45, 5, 500);

             //   }

                Font fontH2 = new Font(Font.FontFamily.HELVETICA, 6, Font.ITALIC);

                for (int i = 0; i < dtEpisodeWise.Rows.Count; i++)
                {
                    for (int j = 0; j < dtEpisodeWise.Columns.Count; j++)
                    {

                        PdfPCell cell = new PdfPCell(new Phrase(dtEpisodeWise.Rows[i][j].ToString(),fontH2));

                        //Align the cell in the center

                        if (dtEpisodeWise.Columns[j].DataType == typeof(decimal))
                        {
                            cell.HorizontalAlignment = PdfPCell.ALIGN_RIGHT;
                            cell.VerticalAlignment = PdfPCell.ALIGN_CENTER;
                        }
                        else
                        {
                            cell.HorizontalAlignment = PdfPCell.ALIGN_LEFT;
                            cell.VerticalAlignment = PdfPCell.ALIGN_CENTER;
                        }

                        table.HeaderRows = 1;

                        table.AddCell(cell);
                    }
                }
                    pdfDoc.Add(table);

                pdfDoc.Close();
                TempPdfFileName++;



            }
            catch (Exception ex)
            {
                //handle exception
            }
            finally
            {

            }
        }
    }
}
}
c# wpf itext
1个回答
2
投票

好吧,有一些你还没有处理掉的东西,这可能会导致问题。其中一个,DataTable实施IDisposablePdfWriter也是如此。

对于PdfWriter,你真的不需要它作为变量声明,因为你还没有真正使用它,所以你可以像处理它一样

using (PdfWriter.GetInstance(pdfDoc, msReport))
{
    // ...
}

在需要注意的事情上,您是否正在for循环中随时重新创建字体fontH1fontH2。我没有真正看到它的需要,它们在程序的整个运行过程中都没有改变,所以你可以将它们作为局部变量声明在循环之外,或者作为创建pdf的类中静态字段的一部分。

另一件事是你似乎一遍又一遍地重新创建相同的数据表,现在我可以假设对于你当前的代码,这只是虚拟代码,但从我看到的,你没有必要在里面创建数据表你的循环,我宁愿认为它可能是创建你的pdf文件的参数(就像文件名一样),所以你可以重写你的代码来在自己的类中创建pdf(我把它命名为PdfModule但是我敢打赌它有很多更好的名称:)),并重组您的代码,以处理当时的1个文件,例如:

public class PdfModule
{
    private static readonly Font H1Font = new Font(Font.FontFamily.HELVETICA, 6, Font.BOLDITALIC);
    private static readonly Font H2Font = new Font(Font.FontFamily.HELVETICA, 6, Font.ITALIC);

    public static void CreateFile(string filename, DataTable data)
    {
        using (var msReport = new FileStream(filename, FileMode.Create, FileAccess.Write))
        {
            using (var pdfDoc = new Document(PageSize.A5.Rotate(), 10f, 10f, 200f, 40f))
            {
                using (PdfWriter.GetInstance(pdfDoc, msReport))
                {
                    pdfDoc.Open();


                    var table = new PdfPTable(data.Columns.Count)
                    {
                        WidthPercentage = 100,
                        HeaderRows = 1
                    };
                    for (var k = 0; k < data.Columns.Count; k++)
                    {
                        var str =
                            System.Globalization.CultureInfo.CurrentCulture.TextInfo.ToTitleCase(data
                                .Columns[k].ColumnName.Replace("_", " ").ToLower());
                        ;
                        var cell = new PdfPCell(new Phrase(str, H1Font))
                        {
                            HorizontalAlignment = Element.ALIGN_CENTER,
                            VerticalAlignment = Element.ALIGN_CENTER
                        };

                        table.AddCell(cell);
                    }

                    for (var i = 0; i < data.Rows.Count; i++)
                    {
                        for (var j = 0; j < data.Columns.Count; j++)
                        {
                            var cell =
                                new PdfPCell(new Phrase(data.Rows[i][j].ToString(), H2Font))
                                {
                                    VerticalAlignment = Element.ALIGN_CENTER,
                                    HorizontalAlignment = data.Columns[j].DataType == typeof(decimal)
                                        ? Element.ALIGN_RIGHT
                                        : Element.ALIGN_LEFT
                                };

                            table.AddCell(cell);
                        }
                    }

                    pdfDoc.Add(table);

                    pdfDoc.Close();
                }
            }
        }
   }
}

这会将字体声明保持为类级别的静态字段,因此在程序运行期间仅将其初始化一次,并编写1个单个文件,其中1个单独的DataTable构建文档。两者都作为参数发送。

为了使用这个类,我编写了以下示例代码,它似乎可以非常快地生成100.000文件(没有在您提到的5小时附近)。

请注意,我没有关于如何在真实程序中处理/填充数据表的任何细节,但至少它会为您提供有关如何重构代码的基本知识,并从那里开始

internal class Program
{
    private static DataTable CreateDataTable( IEnumerable<object[]> rawData )
    {
        var datatable = new DataTable();
        datatable.Columns.Add("INVOICE_NO");
        datatable.Columns.Add("INVOICE_DATE");
        datatable.Columns.Add("CODE");
        datatable.Columns.Add("SERVICE_DESCRIPTION");
        datatable.Columns.Add("QTY", typeof(decimal));
        datatable.Columns.Add("UNIT_PRICE", typeof(decimal));
        datatable.Columns.Add("GROSS", typeof(decimal));
        datatable.Columns.Add("DISCOUNT", typeof(decimal));
        datatable.Columns.Add("NET", typeof(decimal));
        datatable.Columns.Add("DEDUCTION", typeof(decimal));
        datatable.Columns.Add("NET_PAYABLE_WITHOUT_VAT", typeof(decimal));
        datatable.Columns.Add("VAT", typeof(decimal));
        datatable.Columns.Add("NET_PAYABLE_WITH_VAT", typeof(decimal));


        foreach (var row in rawData)
        {
            datatable.Rows.Add(row);
        }

        return datatable;
    }

    public static void Main(string[] args)
    {
        var rowData = new List<object[]>()
        {
            new object[] { "CR100005", "25-05-1989", "CPT004", "SERVICE005", 1, 10, 100, 10, 90,
                45, 45, 5, 50 },
            new object[] { "CR100006", "25-05-1992", "CPT00555", "SERVICE105", 6, 60, 600, 60,
            450, 45, 45, 5, 500 }
        };

        var pdfModule = new PdfModule();
        var outDirectory = Path.Combine(Environment.CurrentDirectory, "Output");
        if (!Directory.Exists(outDirectory))
        {
            // well theoretically I should just create the directory and worry about conflicts differently
            Directory.CreateDirectory(outDirectory);
        }

        Console.WriteLine( $"Creating files to {outDirectory}");
        var nrOfFiles = 100000;
        var stepCount = 1000;
        for (var i = 0; i < nrOfFiles; i++)
        {
            if (i % stepCount == 0)
            {
                Console.WriteLine($"Creating files {i}-{i+stepCount-1}" );
            }
            var filename = Path.Combine(outDirectory, $"{i}.pdf");
            using (var dataTable = CreateDataTable(rowData))
            {
                pdfModule.CreateFile(filename, dataTable);
            }
        }
        Console.WriteLine($"Done, created {nrOfFiles} files");
    }
}

对于其他人,我不确定我的环境是否与您的环境相匹配,但它始终如一地运行(通过ITextSharp的通知,我没有有效的AGPL许可证^ _ ^)我在Linux下使用Rider IDE运行它

© www.soinside.com 2019 - 2024. All rights reserved.