PDF转图片大概有十几种方式,褒贬不一,我就详细给大家说一下我认为效率最高的方式,使用Adobe官方的SDK
安装acrobat reader 9.0以上即可,勾选如下组件。
代码如下:
using System; using System.Drawing; using System.Windows.Forms; using System.IO; using System.Drawing.Imaging; using System.Runtime.InteropServices; using FileEventWatcher.Common; namespace FileEventWatcher { public class ConvertPDFtoImage { /// <summary> /// pdf转图片 /// </summary> /// <param name="pdfFilePath">路径</param> /// <param name="beginPageNum">起始页</param> /// <param name="endPageNum">终了页</param> /// <param name="format">格式</param> public static void ConvertPdfToImage2(string pdfFilePath, int beginPageNum, int endPageNum, ImageFormat format) { //fileSystemWatcherFrm fw = new fileSystemWatcherFrm(); //fw.FilePath(); FilePath.CreateFilePath(); for (int z = 0; z < Const.Zoom.Length; z++) { string imageDirectoryPath = Const.path[z]; //Acrobat文档对象 Acrobat.CAcroPDDoc pdfDoc = null; //页对象 Acrobat.CAcroPDPage pdfPage = null; //用来描述页中一个矩形区域的对象 Acrobat.CAcroRect pdfRect = null; //Size Acrobat.CAcroPoint pdfPoint = null; //生成操作Pdf文件的Com对象 pdfDoc = (Acrobat.CAcroPDDoc)Microsoft.VisualBasic.Interaction.CreateObject("AcroExch.PDDoc", ""); //检查输入参数 if (!pdfDoc.Open(pdfFilePath)) { throw new FileNotFoundException(string.Format("源文件{0}不存在!", pdfFilePath)); } if (!Directory.Exists(imageDirectoryPath)) { Directory.CreateDirectory(imageDirectoryPath); } if (beginPageNum <= 0) { beginPageNum = 1; } if (endPageNum > pdfDoc.GetNumPages() || endPageNum <= 0) { endPageNum = pdfDoc.GetNumPages(); } if (beginPageNum > endPageNum) { throw new ArgumentException("参数\"beginPageNum\"必须小于\"endPageNum\"!"); } if (format == null) { format = ImageFormat.Png; } if (Const.Zoom[z] <= 0) { Const.Zoom[z] = 1; } //转换 for (int i = beginPageNum; i <= endPageNum; i++) { //取出当前页 pdfPage = (Acrobat.CAcroPDPage)pdfDoc.AcquirePage(i - 1); //得到当前页的大小 pdfPoint = (Acrobat.CAcroPoint)pdfPage.GetSize(); //生成一个页的裁剪区矩形对象 pdfRect = (Acrobat.CAcroRect)Microsoft.VisualBasic.Interaction.CreateObject("AcroExch.Rect", ""); //计算当前页经缩放后的实际宽度和高度,zoom==1时,保持原比例大小 int imgWidth = (int)((double)pdfPoint.x * Const.Zoom[z]); int imgHeight = (int)((double)pdfPoint.y * Const.Zoom[z]); //设置裁剪矩形的大小为当前页的大小 pdfRect.Left = 0; pdfRect.right = (short)imgWidth; pdfRect.Top = 0; pdfRect.bottom = (short)imgHeight; #region old //将当前页的裁剪区的内容编成图片后复制到剪贴板中 try { Clipboard.Clear(); pdfPage.CopyToClipboard(pdfRect, 0, 0, (short)(100 * Const.Zoom[z])); } catch (Exception e) { MessageBox.Show(e.Message.ToString()); } IDataObject clipboardData = Clipboard.GetDataObject(); //检查剪贴板中的对象是否是图片,如果是图片则将其保存为指定格式的图片文件 if (clipboardData.GetDataPresent(DataFormats.Bitmap)) { Bitmap pdfBitmap = (Bitmap)clipboardData.GetData(DataFormats.Bitmap); pdfBitmap.Save( System.IO.Path.Combine(imageDirectoryPath, i.ToString("0000") + "." + format.ToString()), format); pdfBitmap.Dispose(); //Thread.Sleep(10); } //清空剪切板 Clipboard.Clear(); #endregion } //关闭和释放相关COM对象 pdfDoc.Close(); Marshal.ReleaseComObject(pdfRect); Marshal.ReleaseComObject(pdfPoint); Marshal.ReleaseComObject(pdfPage); Marshal.ReleaseComObject(pdfDoc); //会导致中断 why? //await Task.Delay(10); } } } }
这个代码很简单,下一篇我会告诉大家如何利用反射,从PDF中抽取文字出来。