我们在网上搜集的一些电子版资料多数是pdf格式,一些无良培训机构或者自媒体为了博取眼球、引流、会在倒手过程使用一些程式对一些文档进行批量添加水印,或者联系,以此原本干净整洁资料满屏“牛皮藓”,简直是糟糕透了!
from flask import Flask, request, send_file, render_template_string, jsonify
from PyPDF2 import PdfReader, PdfWriter
import os
from pdf2image import convert_from_path
import io
import base64
app = Flask(__name__)
# 根 URL 路由
@app.route('/')
def index():
return render_template_string('''
<!DOCTYPE html>
<html>
<head>
<title>PDF Page Manager</title>
<style>
body {
font-family: Arial, sans-serif;
}
.grid-container {
display: grid;
grid-template-columns: repeat(5, 1fr);
grid-gap: 10px;
margin-bottom: 20px;
}
.grid-item {
text-align: center;
}
.grid-item img {
max-width: 100%;
height: auto;
}
.grid-item input[type="checkbox"] {
margin-top: 5px;
}
</style>
</head>
<body>
<h1>Select Pages to Delete</h1>
<div id="pageContainer"></div>
<button onclick="loadPages()">Load Pages</button>
<button onclick="submitForm()">Submit</button>
<script>
function loadPages() {
fetch('/get-pages', { method: 'GET' })
.then(response => response.json())
.then(data => {
const container = document.getElementById('pageContainer');
container.innerHTML = ''; // 清空容器
data.pages.forEach((page, index) => {
const item = document.createElement('div');
item.className = 'grid-item';
const img = document.createElement('img');
img.src = `data:image/png;base64,${page.image}`;
img.alt = `Page ${index + 1}`;
const checkbox = document.createElement('input');
checkbox.type = 'checkbox';
checkbox.name = 'page';
checkbox.value = index;
const label = document.createElement('label');
label.htmlFor = `page${index}`;
label.appendChild(document.createTextNode(`Page ${index + 1}`));
item.appendChild(img);
item.appendChild(checkbox);
item.appendChild(label);
container.appendChild(item);
});
});
}
function submitForm() {
const checkboxes = document.querySelectorAll('input[type=checkbox]:checked');
const selectedPages = Array.from(checkboxes).map(checkbox => checkbox.value);
fetch('/merge-pdf', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ selected_pages: selectedPages })
}).then(response => {
if (response.ok) {
alert('PDF has been modified and saved.');
} else {
alert('An error occurred while modifying the PDF.');
}
});
}
</script>
</body>
</html>
''')
@app.route('/get-pages', methods=['GET'])
def get_pages():
file_path = r"D:\daku\python编辑pdf\2024年县域未成年人网络消费调研报告-佟毕铖.pdf"
try:
images = convert_from_path(file_path)
page_data = []
for i, image in enumerate(images):
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
page_data.append({'index': i, 'image': img_str})
return jsonify({'pages': page_data})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/merge-pdf', methods=['POST'])
def merge_pdf():
data = request.json
selected_pages = data.get('selected_pages', [])
file_path = r"D:\daku\python编辑pdf\2024年县域未成年人网络消费调研报告-佟毕铖.pdf"
reader = PdfReader(file_path)
writer = PdfWriter()
for page_num in range(len(reader.pages)):
if str(page_num) not in selected_pages:
writer.add_page(reader.pages[page_num])
output_path = r"D:\daku\python编辑pdf\output\modified_report.pdf"
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'wb') as f:
writer.write(f)
return send_file(output_path, as_attachment=True)
if __name__ == '__main__':
app.run(debug=True)
网页端代码:
<!DOCTYPE html>
<html>
<head>
<title>PDF Page Manager</title>
<style>
body {
font-family: Arial, sans-serif;
}
.grid-container {
display: grid;
grid-template-columns: repeat(5, 1fr);
grid-gap: 10px;
margin-bottom: 20px;
}
.grid-item {
text-align: center;
}
.grid-item img {
max-width: 100%;
height: auto;
}
.grid-item input[type="checkbox"] {
margin-top: 5px;
}
</style>
</head>
<body>
<h1>Select Pages to Delete</h1>
<div id="pageContainer"></div>
<button onclick="loadPages()">Load Pages</button>
<button onclick="submitForm()">Submit</button>
<script>
function loadPages() {
fetch('/get-pages', { method: 'GET' })
.then(response => response.json())
.then(data => {
const container = document.getElementById('pageContainer');
container.innerHTML = ''; // 清空容器
data.pages.forEach((page, index) => {
const item = document.createElement('div');
item.className = 'grid-item';
const img = document.createElement('img');
img.src = `data:image/png;base64,${page.image}`;
img.alt = `Page ${index + 1}`;
const checkbox = document.createElement('input');
checkbox.type = 'checkbox';
checkbox.name = 'page';
checkbox.value = index;
const label = document.createElement('label');
label.htmlFor = `page${index}`;
label.appendChild(document.createTextNode(`Page ${index + 1}`));
item.appendChild(img);
item.appendChild(checkbox);
item.appendChild(label);
container.appendChild(item);
});
});
}
function submitForm() {
const checkboxes = document.querySelectorAll('input[type=checkbox]:checked');
const selectedPages = Array.from(checkboxes).map(checkbox => checkbox.value);
fetch('/merge-pdf', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ selected_pages: selectedPages })
}).then(response => {
if (response.ok) {
alert('PDF has been modified and saved.');
} else {
alert('An error occurred while modifying the PDF.');
}
});
}
</script>
</body>
</html>
通过python抽取指定路径pdf格式文件,进行页面分割,将分割页面载入网页
勾选页脚下方小框框,在最下方点击提交保存就好啦!