背景:之前生成pdf一直是直接把html代码通过iTextSharp.text.pdf转成pdf,但是一旦html样式稍微复杂就会错乱,比如input标签不会展示,边框也不会展示,只能展示table这些,所以需要通过其它办法。
一开始是直接搞了个窗体程序做尝试,思路是通过WebBrowser打开html截取图片,注意html需要设置好宽高,否则会以很小的窗口打开,截图会不全。
但是后面想迁移到web项目上发现,web项目没法引用using System.Windows.Forms;从而无法使用WebBrowser。我是直接的把这个dll文件拷到web项目,然后强制引用,最后可以了,一开始虽然可以用WebBrowser,但是运行会报错,看网上通过把WebBrowser另起一个线程,并做一些设置就可以了。
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using iTextSharp.text; using iTextSharp.text.pdf; using System.IO; using System.Windows.Forms; using System.Drawing; using System.Threading; namespace ISPR.Common { public class PDFConvert { public WebBrowser webBrowser = null; public string PDFPath = ""; /// <summary> /// 将含有特殊样式的html(例如含有input标签)先转成图片,再从图片转成pdf,注意html宽高需要设置好,否则WebBrowser打开后转成图片会显示不全 /// </summary> /// <param name="html"></param> /// <param name="pdfFilePath"></param> public void ConvertImgToPDF(String html, String pdfFilePath) { var t = new Thread(()=> { PDFPath = pdfFilePath; webBrowser = new WebBrowser(); webBrowser.ScriptErrorsSuppressed = true; //防止脚本异常跳出弹窗 //是否显式滚动条 webBrowser.ScrollBarsEnabled = false; //加载 html webBrowser.DocumentText = html; //页面加载完成执行事件 webBrowser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(webBrowser_DocumentCompleted); while (webBrowser.ReadyState != System.Windows.Forms.WebBrowserReadyState.Complete) { System.Windows.Forms.Application.DoEvents(); //避免假死,若去掉则可能无法触发 DocumentCompleted 事件。 } }); t.SetApartmentState(ApartmentState.STA); t.Start(); //t.Join(); //阻塞当前线程,等待子线程执行完成 //while(t.IsAlive) //{ //} } public void webBrowser_DocumentCompleted(object sender, EventArgs e)//这个就是当网页载入完毕后要进行的操作 { //获取解析后HTML的大小 System.Drawing.Rectangle rectangle = webBrowser.Document.Body.ScrollRectangle; int width = rectangle.Width; int height = rectangle.Height; //设置解析后HTML的可视区域 webBrowser.Width = width;//宽高可能需要针对每个html css写好宽高,不然WebBrowser获取的宽高可能会很小 webBrowser.Height = height; Bitmap bitmap = new System.Drawing.Bitmap(width, height); webBrowser.DrawToBitmap(bitmap, new System.Drawing.Rectangle(0, 0, width, height)); //设置图片文件保存路径和图片格式,格式可以自定义 //string filePath = AppDomain.CurrentDomain.BaseDirectory + DateTime.Now.ToString("yyyyMMddHHmmssfff") + ".png"; //bitmap.Save(filePath, System.Drawing.Imaging.ImageFormat.Png); if (System.IO.File.Exists(PDFPath)) { File.Delete(PDFPath); } //创建PDF FileStream fileStream = new FileStream(PDFPath, FileMode.Create); byte[] result = CreatePDF(bitmap, width, height); fileStream.Write(result, 0, result.Length); fileStream.Close(); fileStream.Dispose(); webBrowser = null; } public byte[] CreatePDF(Bitmap bitmap, int width, int height) { using (MemoryStream ms = new MemoryStream()) { Document doc = new Document(new iTextSharp.text.Rectangle(0, 0, width, height), 3, 3, 3, 3); // 左右上下 PdfWriter writer = PdfWriter.GetInstance(doc, ms); writer.CloseStream = false; doc.Open(); iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(bitmap, System.Drawing.Imaging.ImageFormat.Png); img.ScalePercent(100); // 放缩比例 doc.Add(img); // 添加图片对像 doc.Close(); return ms.ToArray(); } } /// <summary> /// 将html转成pdf,复杂样式或者含有input等标签无法渲染出效果 /// </summary> /// <param name="html"></param> /// <param name="pdfFilePath"></param> /// <param name="rotate"></param> public void ConvertToPDF(String html, String pdfFilePath, bool rotate = false) { //Create a byte array that will eventually hold our final PDF Byte[] bytes; //Boilerplate iTextSharp setup here //Create a stream that we can write to, in this case a MemoryStream using (var ms = new MemoryStream()) { //Create an iTextSharp Document which is an abstraction of a PDF but **NOT** a PDF using (var doc = new Document()) { //Create a writer that's bound to our PDF abstraction and our stream if(rotate) doc.SetPageSize(iTextSharp.text.PageSize.A4.Rotate()); using (var writer = PdfWriter.GetInstance(doc, ms)) { //Open the document for writing doc.Open(); /************************************************** * Example #2 * * * * Use the XMLWorker to parse the HTML. * * Only inline CSS and absolutely linked * * CSS is supported * * ************************************************/ //XMLWorker also reads from a TextReader and not directly from a string //using (var srHtml = new StringReader(html)) //{ // //Parse the HTML // iTextSharp.tool.xml.XMLWorkerHelper.GetInstance().ParseXHtml(writer, doc, srHtml); //} byte[] data = Encoding.UTF8.GetBytes(html);//字串轉成byte[] MemoryStream msInput = new MemoryStream(data); iTextSharp.tool.xml.XMLWorkerHelper.GetInstance().ParseXHtml(writer, doc, msInput, null, Encoding.UTF8, new UnicodeFontFactory()); doc.Close(); } } //After all of the PDF "stuff" above is done and closed but **before** we //close the MemoryStream, grab all of the active bytes from the stream bytes = ms.ToArray(); } //Now we just need to do something with those bytes. //Here I'm writing them to disk but if you were in ASP.Net you might Response.BinaryWrite() them. //You could also write the bytes to a database in a varbinary() column (but please don't) or you //could pass them to another function for further PDF processing. System.IO.File.WriteAllBytes(pdfFilePath, bytes); } } public class UnicodeFontFactory : FontFactoryImp { private static readonly string arialFontPath = Path.Combine(System.AppDomain.CurrentDomain.BaseDirectory, "Template/Font/arialuni.ttf");//arial unicode MS是完整的unicode字型。 private static readonly string bkt_Path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Fonts), "KAIU.TTF");//標楷體 public override iTextSharp.text.Font GetFont(string fontname, string encoding, bool embedded, float size, int style, BaseColor color, bool cached) { //可用Arial或標楷體,自己選一個 BaseFont baseFont = BaseFont.CreateFont(arialFontPath, BaseFont.IDENTITY_H, BaseFont.EMBEDDED); return new iTextSharp.text.Font(baseFont, size, style, color); } } }