Python自动化办公--pdf操作

一、pdf基本操作

from PyPDF2 import PdfFileReader, PdfFileWriter


def extract_information(pdf_path):
    with open(pdf_path, rb) as fp:
        pdf = PdfFileReader(fp)
        information = pdf.getDocumentInfo()

    return information


def rotate_page(pdf_path):
    pdf_reader = PdfFileReader(pdf_path)
    # 瞬时间旋转90度
    page1 = pdf_reader.getPage(0)
    # page1 = page1.rotateClockwise(90)
    page1 = page1.rotateCounterClockwise(90)
    pdf_writer = PdfFileWriter()
    pdf_writer.addPage(page1)

    with open(rotate_pdf1.pdf, wb) as fp:
        pdf_writer.write(fp)


# 合并操作
def merge_pdfs(paths, output):
    pdf_writer = PdfFileWriter()

    for path in paths:
        pdf_reader = PdfFileReader(path)
        page_number = pdf_reader.getNumPages()
        for page in range(page_number):
            pdf_writer.addPage(pdf_reader.getPage(page))

    with open(output, wb) as fp:
        pdf_writer.write(fp)

# 拆解操作
def split_pdf(path, name_of_split):
    pdf = PdfFileReader(path)

    page_num = pdf.getNumPages()
    for page in range(page_num):
        pdf_writer = PdfFileWriter()
        pdf_writer.addPage(pdf.getPage(page))

        out_put = f{name_of_split}_{page}.pdf
        print(out_put)
        with open(out_put, wb) as fp:
            pdf_writer.write(fp)

if __name__ == __main__:
    # information = extract_information(‘C:\\Users\dongys_z\1.pdf‘)
    # print(information)

    # rotate_page(‘C:\\Users\dongys_z\1.pdf‘)


    # merge_pdfs([‘C:\\Users\dongys_z\1.pdf‘,
    #             ‘C:\\Users\dongys_z\简历.pdf‘], ‘total.pdf‘)

    split_pdf(total.pdf, split)

二、pdf内容读取

import pdfplumber as pb
from PyPDF2 import PdfFileReader, PdfFileWriter

def transform_txt(path, output):
    pdf = pb.open(path)

    page_number = len(pdf.pages)

    fp = open(output, w, encoding=utf-8)

    for i in range(page_number):
        page = pdf.pages[i]
        text = page.extract_text()
        fp.write(text)

    fp.close()

def create_watermak(input_path, output_path, watermark):
    watermark_obj = PdfFileReader(watermark)
    watermark_page = watermark_obj.getPage(0)

    pdf_reader = PdfFileReader(input_path)
    pdf_writer = PdfFileWriter()

    for i in range(pdf_reader.getNumPages()):
        page = pdf_reader.getPage(i)
        page.mergePage(watermark_page)
        page.compressContentStreams()
        pdf_writer.addPage(page)

    with open(output_path, wb) as fp:
        pdf_writer.write(fp)


if __name__ == __main__:
    # transform_txt(‘total.pdf‘, ‘total.txt‘)

    create_watermak(total.pdf, QFmarkerpdf.pdf, merge_pdf.pdf)

 

Python自动化办公--pdf操作

上一篇:leetcode(94)_1247_medium_交换字符使得字符串相同_python


下一篇:Chrome 94 地址栏输入卡顿临时解决方法