整理一下这些天写的程序,主要是应用pyqt5实现GUI设计,并实现网络爬虫、图片处理,消息推送机器人,excel表格的读写操作等,接合业务逻辑,代码如下:
import json
import os
import time
import sys
import requests
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtCore import QThread, pyqtSignal, QMutex, QWaitCondition
import tkinter as tk
from tkinter import filedialog
import xlrd
from xlwt import *
from PyQt5.QtWidgets import QMessageBox
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from PIL import Image
import calendar;
import ssl
# 复制出来的webhook
webhook = '********************************************'
msg1="招标提醒\n"
msg2="异常提醒\n"
keyTitle=[]
keyContent=[]
headers = {
'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36",
}
ssl._create_default_https_context = ssl._create_unverified_context#全局取消ssl验证
#爬虫线程类
class Thread(QThread):
#自定义信号
def __init__(self):
super(Thread,self).__init__()
self.isPause=False
self.mutex=QMutex()
self.cond =QWaitCondition()
def pause(self):
print("线程休眠!")
self.isPause=True
def resume(self):
print("线程启动!")
self.isPause=False
self.cond.wakeAll()
def run(self):
filePath = ui.lineEdit.text()
data = xlrd.open_workbook(filePath)
table = data.sheets()[0]
webList=import_excel(table) #爬取的网站列表
#print(len(webList))
key1 = ui.textEdit_2.toPlainText()
key2 = ui.textEdit.toPlainText()
while True:
for item in webList:
self.mutex.lock()
if self.isPause:
self.cond.wait(self.mutex)
print(item["url"])
try:
reptile(item["url"],key1,key2)
except Exception as e:
print("线程遇到异常,异常原因:"+e)
dingtalk("异常", webhook, msg2+item["url"]+"线程遇到异常!请尽快联系技术人员排查!"+e)
time.sleep(2)
self.mutex.unlock()
#Thread.sleep(5)
class Ui_hwdz(object):
def setupUi(self, hwdz):
hwdz.setObjectName("hwdz")
hwdz.setFixedSize(699, 749)
hwdz.setAcceptDrops(True)
self.test = QtWidgets.QTabWidget(hwdz)
self.test.setEnabled(True)
self.test.setGeometry(QtCore.QRect(10, 10, 680, 721))
self.test.setTabletTracking(False)
self.test.setAutoFillBackground(False)
self.test.setObjectName("test")
self.tab = QtWidgets.QWidget()
self.tab.setObjectName("tab")
self.label = QtWidgets.QLabel(self.tab)
self.label.setGeometry(QtCore.QRect(60, 50, 91, 20))
self.label.setObjectName("label")
self.lineEdit = QtWidgets.QLineEdit(self.tab)
self.lineEdit.setGeometry(QtCore.QRect(160, 40, 371, 41))
self.lineEdit.setObjectName("lineEdit")
self.pushButton = QtWidgets.QPushButton(self.tab)
self.pushButton.setGeometry(QtCore.QRect(550, 40, 71, 41))
self.pushButton.setObjectName("pushButton")
self.pushButton_2 = QtWidgets.QPushButton(self.tab)
self.pushButton_2.setGeometry(QtCore.QRect(170, 360, 111, 41))
self.pushButton_2.setObjectName("pushButton_2")
self.pushButton_3 = QtWidgets.QPushButton(self.tab)
self.pushButton_3.setGeometry(QtCore.QRect(320, 360, 111, 41))
self.pushButton_3.setObjectName("pushButton_3")
self.textBrowser = QtWidgets.QTextBrowser(self.tab)
self.textBrowser.setGeometry(QtCore.QRect(20, 470, 631, 192))
self.textBrowser.setObjectName("textBrowser")
self.label_2 = QtWidgets.QLabel(self.tab)
self.label_2.setGeometry(QtCore.QRect(30, 430, 81, 31))
self.label_2.setObjectName("label_2")
self.pushButton_8 = QtWidgets.QPushButton(self.tab)
self.pushButton_8.setGeometry(QtCore.QRect(480, 360, 111, 41))
self.pushButton_8.setObjectName("pushButton_8")
self.label_6 = QtWidgets.QLabel(self.tab)
self.label_6.setGeometry(QtCore.QRect(60, 160, 101, 20))
self.label_6.setText("")
self.label_6.setObjectName("label_6")
self.label_7 = QtWidgets.QLabel(self.tab)
self.label_7.setGeometry(QtCore.QRect(90, 110, 61, 31))
self.label_7.setObjectName("label_7")
self.textEdit = QtWidgets.QTextEdit(self.tab)
self.textEdit.setGeometry(QtCore.QRect(160, 200, 461, 121))
self.textEdit.setObjectName("textEdit")
self.textEdit_2 = QtWidgets.QTextEdit(self.tab)
self.textEdit_2.setGeometry(QtCore.QRect(160, 110, 461, 71))
self.textEdit_2.setObjectName("textEdit_2")
self.label_8 = QtWidgets.QLabel(self.tab)
self.label_8.setGeometry(QtCore.QRect(90, 200, 61, 31))
self.label_8.setObjectName("label_8")
self.test.addTab(self.tab, "")
self.tab_2 = QtWidgets.QWidget()
self.tab_2.setObjectName("tab_2")
self.label_3 = QtWidgets.QLabel(self.tab_2)
self.label_3.setGeometry(QtCore.QRect(50, 120, 101, 20))
self.label_3.setObjectName("label_3")
self.lineEdit_2 = QtWidgets.QLineEdit(self.tab_2)
self.lineEdit_2.setGeometry(QtCore.QRect(150, 110, 371, 41))
self.lineEdit_2.setObjectName("lineEdit_2")
self.pushButton_4 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_4.setGeometry(QtCore.QRect(540, 110, 71, 41))
self.pushButton_4.setObjectName("pushButton_4")
self.pushButton_5 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_5.setGeometry(QtCore.QRect(200, 320, 111, 41))
self.pushButton_5.setObjectName("pushButton_5")
self.pushButton_6 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_6.setGeometry(QtCore.QRect(350, 320, 101, 41))
self.pushButton_6.setObjectName("pushButton_6")
self.label_4 = QtWidgets.QLabel(self.tab_2)
self.label_4.setGeometry(QtCore.QRect(50, 210, 101, 20))
self.label_4.setObjectName("label_4")
self.lineEdit_3 = QtWidgets.QLineEdit(self.tab_2)
self.lineEdit_3.setGeometry(QtCore.QRect(150, 200, 371, 41))
self.lineEdit_3.setObjectName("lineEdit_3")
self.pushButton_7 = QtWidgets.QPushButton(self.tab_2)
self.pushButton_7.setGeometry(QtCore.QRect(540, 200, 71, 41))
self.pushButton_7.setObjectName("pushButton_7")
self.label_5 = QtWidgets.QLabel(self.tab_2)
self.label_5.setGeometry(QtCore.QRect(80, 410, 72, 15))
self.label_5.setObjectName("label_5")
self.textBrowser_2 = QtWidgets.QTextBrowser(self.tab_2)
self.textBrowser_2.setGeometry(QtCore.QRect(40, 450, 621, 192))
self.textBrowser_2.setObjectName("textBrowser_2")
self.lineEdit_4 = QtWidgets.QLineEdit(self.tab_2)
self.lineEdit_4.setGeometry(QtCore.QRect(150, 40, 141, 41))
self.lineEdit_4.setObjectName("lineEdit_4")
self.label_9 = QtWidgets.QLabel(self.tab_2)
self.label_9.setGeometry(QtCore.QRect(80, 50, 61, 20))
self.label_9.setObjectName("label_9")
self.test.addTab(self.tab_2, "")
self.tab_3 = QtWidgets.QWidget()
self.tab_3.setObjectName("tab_3")
self.label_10 = QtWidgets.QLabel(self.tab_3)
self.label_10.setGeometry(QtCore.QRect(60, 100, 72, 15))
self.label_10.setObjectName("label_10")
self.textBrowser_3 = QtWidgets.QTextBrowser(self.tab_3)
self.textBrowser_3.setGeometry(QtCore.QRect(80, 360, 521, 121))
self.textBrowser_3.setObjectName("textBrowser_3")
self.label_11 = QtWidgets.QLabel(self.tab_3)
self.label_11.setGeometry(QtCore.QRect(90, 330, 72, 15))
self.label_11.setObjectName("label_11")
self.lineEdit_5 = QtWidgets.QLineEdit(self.tab_3)
self.lineEdit_5.setGeometry(QtCore.QRect(130, 80, 441, 51))
self.lineEdit_5.setObjectName("lineEdit_5")
self.pushButton_9 = QtWidgets.QPushButton(self.tab_3)
self.pushButton_9.setGeometry(QtCore.QRect(280, 220, 141, 41))
self.pushButton_9.setObjectName("pushButton_9")
self.test.addTab(self.tab_3, "")
self.retranslateUi(hwdz)
self.test.setCurrentIndex(0)
QtCore.QMetaObject.connectSlotsByName(hwdz)
#信号与槽
self.th = Thread()
root = tk.Tk()
root.withdraw()
self.pushButton.clicked.connect(self.browse) # 浏览
self.pushButton_2.clicked.connect(self.start) # 开始
self.pushButton_3.clicked.connect(self.stop) # 暂停
self.pushButton_8.clicked.connect(self.conti) # 继续
self.pushButton_4.clicked.connect(self.browseImage) # 继续
self.pushButton_5.clicked.connect(self.exchange) # 继续
self.pushButton_7.clicked.connect(self.browseSave) # 继续
self.pushButton_6.clicked.connect(self.onlyCompress) # 继续
self.pushButton_9.clicked.connect(self.testDemo) # 继续
self.pushButton_8.setEnabled(False)
self.pushButton_3.setEnabled(False)
self.pushButton_6.setEnabled(False)
def retranslateUi(self, hwdz):
_translate = QtCore.QCoreApplication.translate
hwdz.setWindowTitle(_translate("hwdz", "Form"))
self.label.setText(_translate("hwdz", "文件输入路径"))
self.pushButton.setText(_translate("hwdz", "浏览"))
self.pushButton_2.setText(_translate("hwdz", "开始"))
self.pushButton_3.setText(_translate("hwdz", "暂停"))
self.textBrowser.setHtml(_translate("hwdz",
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head><body style=\" font-family:\'SimSun\'; font-size:9pt; font-weight:400; font-style:normal;\">\n"
"<p style=\"-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><br /></p></body></html>"))
self.label_2.setText(_translate("hwdz", "信息:"))
self.pushButton_8.setText(_translate("hwdz", "继续"))
self.label_7.setText(_translate("hwdz", "内容栏目"))
self.textEdit.setHtml(_translate("hwdz",
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head><body style=\" font-family:\'SimSun\'; font-size:9pt; font-weight:400; font-style:normal;\">\n"
"<p style=\"-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><br /></p></body></html>"))
self.textEdit_2.setHtml(_translate("hwdz",
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head><body style=\" font-family:\'SimSun\'; font-size:9pt; font-weight:400; font-style:normal;\">\n"
"<p style=\"-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><br /></p></body></html>"))
self.label_8.setText(_translate("hwdz", "关键词:"))
self.test.setTabText(self.test.indexOf(self.tab), _translate("hwdz", "爬取"))
self.label_3.setText(_translate("hwdz", "图片输入路径"))
self.pushButton_4.setText(_translate("hwdz", "浏览"))
self.pushButton_5.setText(_translate("hwdz", "修改"))
self.pushButton_6.setText(_translate("hwdz", "仅压缩"))
self.label_4.setText(_translate("hwdz", "图片输出路径"))
self.pushButton_7.setText(_translate("hwdz", "浏览"))
self.label_5.setText(_translate("hwdz", "信息:"))
self.label_9.setText(_translate("hwdz", "指定宽度"))
self.test.setTabText(self.test.indexOf(self.tab_2), _translate("hwdz", "图片 "))
self.label_10.setText(_translate("hwdz", "链接地址:"))
self.label_11.setText(_translate("hwdz", "信息:"))
self.pushButton_9.setText(_translate("hwdz", "开始测试"))
self.test.setTabText(self.test.indexOf(self.tab_3), _translate("hwdz", "测试"))
#自定义成员函数
def browse(self):
file_path = filedialog.askopenfilename(initialdir = 'D:/',title = '请选择文件',filetypes = [('*.xlsx',"*.xls")])
#赋值给输入框
self.lineEdit.setText(file_path)
def start(self):
filePath = self.lineEdit.text()
if filePath == "" or self.textEdit_2.toPlainText()=="" or self.textEdit.toPlainText()=="":
QMessageBox.critical(None, "错误", "文件选择或输入框输入有误,请检查是否正确与是否填写!")
else:
if self.pushButton_2.text()=="重新开始":
self.th=Thread()
self.pushButton_8.setEnabled(False)
self.pushButton_3.setText("暂停")
self.pushButton_3.setEnabled(True)
self.th.start()
self.pushButton_2.setText("正在爬取")
#设置组件为禁止
self.pushButton.setEnabled(False)
self.pushButton_2.setEnabled(False)
self.lineEdit.setEnabled(False)
self.pushButton_3.setEnabled(True)
self.pushButton_8.setEnabled(True)
def stop(self):
self.th.pause()
self.pushButton_3.setText("已暂停")
self.pushButton_3.setEnabled(False)
self.pushButton_2.setText("重新开始")
self.pushButton_2.setEnabled(True)
self.pushButton_8.setEnabled(True)
self.pushButton.setEnabled(True)
self.lineEdit.setEnabled(True)
def conti(self):
self.th.resume()
self.pushButton_2.setText("正在爬取")
self.pushButton_2.setEnabled(False)
self.pushButton_3.setText("暂停")
self.pushButton_3.setEnabled(True)
self.pushButton_8.setEnabled(False)
self.lineEdit.setEnabled(False)
def browseImage(self):
file_path = filedialog.askopenfilename(
initialdir='D:/', title='请选择文件')
self.lineEdit_2.setText(file_path)
def browseSave(self):
file = filedialog.askdirectory() #获取文件夹路径
self.lineEdit_3.setText(file)
def exchange(self):
#清空信息框
self.textBrowser_2.clear()
file_path=self.lineEdit_2.text()
save_path=self.lineEdit_3.text()
input_width=self.lineEdit_4.text()
if file_path!="":
img = Image.open(file_path)
w = img.width
h = img.height
f = img.format
if input_width=="":
input_width=720 #默认720
dh = (int(h) * int(input_width)) / int(w)
newImg = img.resize((int(input_width), int(dh)), Image.ANTIALIAS)
ts = calendar.timegm(time.gmtime())
# print(ts)
if save_path == "":
out_path = "D:/" + str(ts) + ".png"
else:
out_path = save_path + "/" + str(ts) + ".png"
# print(newImg.width, newImg.height)
newImg.save(out_path, quality=20)
info1 = "修改成功!存放路径为:" + out_path + "<br>"
info2 = "原图片宽度:" + str(w) + " 高度:" + str(h) + "<br>" + "修改后宽度:" + str(input_width) + " 高度:" + str(int(dh))
self.textBrowser_2.setText(info1 + info2)
QMessageBox.about(None, "成功", "图片修改成功!")
#信息显示在textbrower
else:
QMessageBox.critical(None, "错误", "文件选择或输入框输入有误,请检查是否正确与是否填写!")
def onlyCompress(self):
file_path = self.lineEdit_2.text()
save_path = self.lineEdit_3.text()
ts = calendar.timegm(time.gmtime())
if file_path!="":
img = Image.open(file_path)
if save_path == "":
out_path = "D:/" + str(ts) + ".png"
self.lineEdit_3.setText("默认路径为" + out_path)
else:
out_path = save_path + "/" + str(ts) + ".png"
self.lineEdit_3.setText(out_path)
img.save(out_path, quality=20)
#测试爬虫
def testDemo(self):
testUrl = self.lineEdit_5.text()
if testUrl=="" or testUrl[0]!="h":
QMessageBox.critical(None, "错误", "请输入正确的地址!")
test_reptile(testUrl)
#解析excel表格内容
def import_excel(excel):
tables=[]
for rown in range(excel.nrows):
array = {
"name": "",
"url": "",
"remarks": ""
}
array["name"]=excel.cell_value(rown,0)
array["url"]=excel.cell_value(rown,1)
array["remarks"]=excel.cell_value(rown,2)
tables.append(array)
tables.pop(0)
return tables#写入excel表格
import xlwt #写入excel
import xlrd #读excel
from xlutils.copy import copy #读和写中间转换器
def create_excel(words):
filename = "record.xlsx"
sheet_name = 'record'
#创建工作蒲
work_book=xlwt.Workbook(encoding="utf-8")
#创建sheet表单
sheet = work_book.add_sheet(sheet_name)
#写表头
head=[]
for k in words[0].keys(): #把字典的key获取到作为表头
head.append(k)
for i in range(len(head)):
sheet.write(0,i,head[i])
#添加内容
i=1 #行号
for item in words:
for j in range(len(head)):
sheet.write(i,j,item[head[j]])
#写完一行行号加1
i+=1
#保存
work_book.save(filename)
#print("写入excel成功!")
ui.th.sleep(1)
#追加数据到excel
def append_excel(words):
# 打开excel
word_book = xlrd.open_workbook("record.xlsx")
# 获取所有的sheet表单。
sheets = word_book.sheet_names()
# 获取第一个表单
work_sheet = word_book.sheet_by_name(sheets[0])
# 获取已经写入的行数
old_rows = work_sheet.nrows
# 获取表头信息
heads = work_sheet.row_values(0)
# 将xlrd对象变成xlwt
new_work_book = copy(word_book)
# 添加内容
new_sheet = new_work_book.get_sheet(0)
i = old_rows
for item in words:
for j in range(len(heads)):
new_sheet.write(i,j,item[heads[j]])
i += 1
new_work_book.save("record.xlsx")
ui.th.sleep(1)
#print('追加成功!')
#爬虫函数
def reptile(url,key1,key2): #url网站主入口
# 请求网络地址得到html网页代码
req = Request(url=url, headers=headers)
html = urlopen(req)
# 整理代码
soup = BeautifulSoup(html, 'html.parser',from_encoding="gbk")
# 找出所有的 a 标签, 因为所有的链接都在 a 标签内
data = soup.find_all('a')
#print(data)
tables=[]
keyList1=keyList(key1)
keyList2 = keyList(key2)
#print("keyList1",keyList1,"keyList2",keyList2)
# 遍历a标签获取title、text与href
for item in data:
aTag = {
"time":"",
"title":"",
"text": "",
"href": "",
}
title = item.get('title')
h = item.get('href')
text = item.string
if title==None:
title="没有标题"
if text==None:
text="没有文本"
aTag["text"] = text
aTag["title"] = title
ise1 = isExit(keyList1, text, title)
ise2 = isExit(keyList2,text, title)
try:
if ise1 and ise2 and h != 'javascript:;' and h != '#' and recordExit(text)==False: #标题关键词满足
nowTime=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
aTag["time"]=nowTime
if(h[0]=="h"):
aTag["href"]=h
else:
aTag["href"] = urlCon(url, h)
print(aTag)
tables.append(aTag)
string = nowTime+":"+title+"url:"+aTag["href"]
ui.textBrowser.append(string)
message=msg1+"栏目内容:"+key1+"\r\n关键词:"+key2+"\r\n时间:"+nowTime+"\r\n标题:"+title+"\r\n内容:"+text+"\r\n链接:"+aTag["href"]+"\r\n温馨提醒:链接可能不准确,如果错误请访问原站查看,地址:"+url
dingtalk("提醒", webhook, message)
time.sleep(3)
except Exception as e:
print(url+"爬虫出现异常,原因:"+e)
dingtalk("异常", webhook, msg2+url+"爬虫遇到异常!请尽快联系技术人员排查!"+e)
#print(tables)
if len(tables)!=0:
# 将记录写入excel表格
if os.path.exists("record.xlsx"): # 文件存在,追加
append_excel(tables)
else: # 不存在,新建
create_excel(tables)
time.sleep(2)
#判断关键词
def isExit(a,b,c):
for item in a:
if item in b or item in c:
return True
return False
#获取关键字列表
def keyList(str):
strList = []
#字符串替换
s = str.replace(',', ' ,') #英文逗号替换为中文逗号
#字符串切割
strList =s.split(",")#按照中文逗号切换
return strList
#url拼接
def urlCon(url,href):
strList=url.split("/")
lastStr=strList[len(strList)-1]
return url.replace(lastStr,"")+href
#钉钉消息提醒
def dingtalk(msg,webhook, message):
headers = {'Content-Type': 'application/json; charset=utf-8'}
data = {'msgtype': 'text', 'text': {'title': msg, "content": message
,'messageUrl':'http://请求IP+端口号'}, 'at': {'atMobiles': ['群消息@的用户1','群消息@的用户2'], 'isAtAll': False}}
post_data = json.dumps(data)
response = requests.post(webhook, headers=headers, data=post_data)
return response.text
#爬虫测试
def test_reptile(url):
headers = {
'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36",
}
try:
ui.textBrowser_3.clear()
req = Request(url=url, headers=headers)
html = urlopen(req)
soup = BeautifulSoup(html, 'html.parser', from_encoding="gb18030")
data = soup.find_all('a')
time.sleep(1)
req = Request(url=url, headers=headers)
html = urlopen(req)
soup = BeautifulSoup(html, 'html.parser', from_encoding="gb18030")
data = soup.find_all('a')
except:
ui.textBrowser_3.append(url+"\r\n有异常!")
else:
ui.textBrowser_3.append(url+"\r\n无异常!")
#查询该记录是否存在
def recordExit(text):
if os.path.exists("record.xlsx"):
data = xlrd.open_workbook("./record.xlsx")
table = data.sheets()[0]
for rown in range(table.nrows):
t=table.cell_value(rown,2)
if text==t:
#print("记录存在!")
return True
return False
else:
filename = "record.xlsx"
sheet_name = 'record'
# 创建工作蒲
work_book = xlwt.Workbook(encoding="utf-8")
# 创建sheet表单
sheet = work_book.add_sheet(sheet_name)
return False
if __name__=='__main__':
app=QtWidgets.QApplication(sys.argv)
MainWindow = QtWidgets.QMainWindow()
ui=Ui_hwdz()
ui.setupUi(MainWindow)
MainWindow.show()
sys.exit(app.exec_())