python做爬虫的效率很高。然后用上pyqt做一个界面,写了一个可以下载任意网页上所有图片的程序
import sys from PyQt4 import QtGui,QtCore import urllib import os import re import thread import threading class Main_QWidget(QtGui.QWidget): def __init__(self): QtGui.QWidget.__init__(self) self.website=QtGui.QLineEdit(self) #网址框 self.filepath=QtGui.QLineEdit(self) #路径框 self.selectpathbutton=QtGui.QPushButton(‘select‘,self) self.tag=QtGui.QLabel(‘ready‘,self) #状态 downloadbutton=QtGui.QPushButton(‘download‘,self) grid=QtGui.QGridLayout() grid.addWidget(self.website,0,0,1,3) grid.addWidget(self.filepath,1,0,1,2) grid.addWidget(self.selectpathbutton,1,2) grid.addWidget(downloadbutton,2,0) grid.addWidget(self.tag,2,2) self.resize(300,300) self.setLayout(grid) self.setWindowTitle("download image") self.connect(self.selectpathbutton,QtCore.SIGNAL(‘clicked()‘),self.selectPath) self.connect(downloadbutton,QtCore.SIGNAL(‘clicked()‘),download) def getWebsite(self): return self.website.text() #获取网址的函数 def getFilePath(self): return self.filepath.text() #获取文件路径的函数 def setTag(self,downloadtag): self.tag.setText(downloadtag) #显示状态 def selectPath(self): fileName = QtGui.QFileDialog.getExistingDirectory( self, ‘Open‘ ) self.filepath.setText(fileName) return fileName class mythread(threading.Thread): def __init__(self,Website,FilePath): threading.Thread.__init__(self) self.Website=Website self.FilePath=FilePath def run(self): html=getHtml(self.Website) print self.Website print self.FilePath getImg(html,self.FilePath) main.setTag(‘finsh‘) #下载完毕后,将状态改为完成 def download(): Website=unicode(main.getWebsite(),‘utf-8‘).encode(‘utf-8‘) FilePath=main.getFilePath() main.setTag(‘go‘) #thread.start_new_thread(run,(Website,FilePath)) t=mythread(Website,FilePath) #新建一个线程,传入参数 t.start() # while not t.isAlive(): # main.setTag(‘finish‘) #html=getHtml(Website) #print html #getImg(html,FilePath) #print path def getHtml(url): return urllib.urlopen(url).read() #返回网页源码 def getImg(html,path): reg=re.compile(r‘src="(.*?\.(jpg|gif|png))"‘) imglist=reg.findall(html) print len(imglist) x=0 for imgurl in imglist: print imgurl main.setTag(str(x)+‘/‘+str(len(imglist))) #在状态栏上显示进度 if imgurl[1]==‘gif‘: xpath=path+‘\%d.gif‘ % x urllib.urlretrieve(imgurl[0],xpath) elif imgurl[1]==‘png‘: xpath=path+‘\%d.png‘ % x urllib.urlretrieve(imgurl[0],xpath) else: xpath=path+‘\%d.jpg‘ % x urllib.urlretrieve(imgurl[0],xpath) x+=1 print ‘finish--------‘ if __name__=="__main__": app=QtGui.QApplication(sys.argv) main=Main_QWidget() main.show() sys.exit(app.exec_())