场景说明:
1、定时从网站下载程序文件;
2、定时清理文件,以免占用磁盘空间过大;
程序功能:
1、使用urllib2,urllib类从网站抓取数据,并下载到指定路径;
2、为避免重复下载,在下载前做数据对比;
3、使用多线程,一个实现下载的功能,另一个实现清理功能;
4、每24小时执行一次。
import urllib2,urllib
import re
import os,sys
import time
import datetime
import threading
proxy_info={'user':'user', 'password':'xxxxxx' , 'server':'http://xxx:8080'}
url1 = "http://xxx.com/"
path=r'x:\download'
con=threading.Condition()
def downloadpatch(path,url1):
if con.acquire():
while 1:
print ' start thread of downloadpatch'
print 'present time is: ',datetime.datetime.now()
passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
passmgr.add_password(None, proxy_info['server'] , proxy_info['user'], proxy_info['password'])
auth = urllib2.ProxyBasicAuthHandler(passmgr)
opener = urllib2.build_opener(urllib2.ProxyHandler({'http':proxy_info['server']}) , auth)
urllib2.install_opener(opener)
proname = urllib2.urlopen(url1)
text=proname.read()
print 'connect to website successfully'
#print text
name=re.findall('HREF="(\d+xdat.exe)"',text,re.IGNORECASE)
#name=re.findall('HREF="(readme.txt)"',text,re.IGNORECASE)
print 'the following files are all patchs in the website: '
print name
files=os.listdir(path)
for i in files:
for x in name:
if i==x:
name.remove(x)
if len(name)>0:
print 'the following files are need to download:'
print name
print 'please wait......'
for i in name:
f=open(path+'\\'+i,'wb')
downpro=urllib2.urlopen(url1+i)
while 1:
data=downpro.read(1024)
if not len(data):
break
f.write(data)
f.close()
print '%s files have download!!!'%i
f1=open(path+'\\'+'log'+'\\'+'log.txt','a')
f1.write(str(datetime.datetime.now())+' ')
f1.write('%s files have download!!!'%i)
f1.write('\n')
f1.close()
else:
print 'no files have to download'
proname.close()
print '--------------------------------------------'
con.notify()
con.wait()
time.sleep(24*60*60)
#time.sleep(10)
def deletepatch(yourpath):
if con.acquire():
while 1:
print ' starting thread of delete files'
print 'present time is :',datetime.datetime.now()
pathlist=os.listdir(yourpath)#list all files
for i in range(len(pathlist)):#counts
source=yourpath+'\\'+pathlist[i]#path of a file
if os.path.isfile(source):#whether is file
m=time.localtime(os.stat(source).st_ctime)# create time of file
endtime=datetime.datetime.now()# now time
startime=datetime.datetime(m.tm_year,m.tm_mon,m.tm_mday,m.tm_hour,m.tm_min,m.tm_sec)
#translate the time
mydays=(endtime-startime).days
if mydays>=7:#if time is over 7 days
os.remove(source)# remove the file
print 'File',source,'have been deleted'
f2=open(path+'\\'+'log'+'\\'+'log.txt','a')
f2.write(str(datetime.datetime.now()))
f2.write('File',source,'have been deleted')
f2.write('\n')
f2.close()
else:
print 'File',source,'is now useful for us'
else:
print 'File',source,'is not execute program'
print '--------------------------------------------'
con.notify()
con.wait()
time.sleep(24*60*60)
#time.sleep(10)
if __name__=='__main__':
try:
t1=threading.Thread(None,target=downloadpatch,args=(path,url1))
t1.start()
t2=threading.Thread(None,target=deletepatch,args=(path,))
t2.start()
except Exception,e:
print e