一、pdf基本操作
from PyPDF2 import PdfFileReader, PdfFileWriter def extract_information(pdf_path): with open(pdf_path, ‘rb‘) as fp: pdf = PdfFileReader(fp) information = pdf.getDocumentInfo() return information def rotate_page(pdf_path): pdf_reader = PdfFileReader(pdf_path) # 瞬时间旋转90度 page1 = pdf_reader.getPage(0) # page1 = page1.rotateClockwise(90) page1 = page1.rotateCounterClockwise(90) pdf_writer = PdfFileWriter() pdf_writer.addPage(page1) with open(‘rotate_pdf1.pdf‘, ‘wb‘) as fp: pdf_writer.write(fp) # 合并操作 def merge_pdfs(paths, output): pdf_writer = PdfFileWriter() for path in paths: pdf_reader = PdfFileReader(path) page_number = pdf_reader.getNumPages() for page in range(page_number): pdf_writer.addPage(pdf_reader.getPage(page)) with open(output, ‘wb‘) as fp: pdf_writer.write(fp) # 拆解操作 def split_pdf(path, name_of_split): pdf = PdfFileReader(path) page_num = pdf.getNumPages() for page in range(page_num): pdf_writer = PdfFileWriter() pdf_writer.addPage(pdf.getPage(page)) out_put = f‘{name_of_split}_{page}.pdf‘ print(out_put) with open(out_put, ‘wb‘) as fp: pdf_writer.write(fp) if __name__ == ‘__main__‘: # information = extract_information(‘C:\\Users\dongys_z\1.pdf‘) # print(information) # rotate_page(‘C:\\Users\dongys_z\1.pdf‘) # merge_pdfs([‘C:\\Users\dongys_z\1.pdf‘, # ‘C:\\Users\dongys_z\简历.pdf‘], ‘total.pdf‘) split_pdf(‘total.pdf‘, ‘split‘)
二、pdf内容读取
import pdfplumber as pb from PyPDF2 import PdfFileReader, PdfFileWriter def transform_txt(path, output): pdf = pb.open(path) page_number = len(pdf.pages) fp = open(output, ‘w‘, encoding=‘utf-8‘) for i in range(page_number): page = pdf.pages[i] text = page.extract_text() fp.write(text) fp.close() def create_watermak(input_path, output_path, watermark): watermark_obj = PdfFileReader(watermark) watermark_page = watermark_obj.getPage(0) pdf_reader = PdfFileReader(input_path) pdf_writer = PdfFileWriter() for i in range(pdf_reader.getNumPages()): page = pdf_reader.getPage(i) page.mergePage(watermark_page) page.compressContentStreams() pdf_writer.addPage(page) with open(output_path, ‘wb‘) as fp: pdf_writer.write(fp) if __name__ == ‘__main__‘: # transform_txt(‘total.pdf‘, ‘total.txt‘) create_watermak(‘total.pdf‘, ‘QFmarkerpdf.pdf‘, ‘merge_pdf.pdf‘)