下载python标准库--python

下载python标准库--python
  1 #coding:utf-8
  2 import urllib2
  3 import os,sys
  4 from BeautifulSoup import BeautifulSoup          # For processing HTML
  5 from bs4 import BeautifulSoup
  6 class BookSave():
  7     ‘‘‘
  8     dir:html文件保存目录  url:index.html目录 static_url:js、css所在目录的上级目录
  9     distinguish:用来区分相同tag.name   dis_key:所需的tag属性  key1:所取tag
 10     key2:tag属性   key3:tag属性值
 11     ‘‘‘
 12     def __init__(self,dir,url,static_url,distinguish,dis_key,key1,key2,key3):
 13         self.dir=dir
 14         self.url = url
 15         self.static_url = static_url
 16         self.distinguish = distinguish
 17         self.dis_key = dis_key
 18         self.key1 = key1
 19         self.key2 = key2
 20         self.key3 = key3
 21 
 22     def AddUrl(self):
 23         if self.dir != ‘‘:
 24             list = os.listdir(self.dir)  #列出目录下的所有文件和目录
 25             for line in list:
 26                 if os.path.isdir(line):
 27                     continue
 28                 elif os.path:
 29                     self.JieXiCsss(line)
 30                     self.JieXiJs(line)
 31 
 32     def JieXiCsss(self,file):
 33         filePath = os.path.join(self.dir,file)
 34         print filePath
 35         fp = open(filePath)
 36         soup = BeautifulSoup(fp)
 37         head = soup.head        
 38         tags = head.findAll(link)#,{‘rel‘:‘stylesheet‘}
 39         if tags != []:
 40             for item in tags:
 41                 try:
 42                     item[href] = self.static_url + item[href]
 43                     print item[href]    
 44                 except KeyError:
 45                     continue            
 46         else :
 47             print tags,filePath
 48         self.SaveHtml(soup,filePath)
 49 
 50     def JieXiJs(self,file):
 51         filePath = os.path.join(self.dir,file)
 52         fp = open(filePath)
 53         soup = BeautifulSoup(fp)
 54         head = soup.head        
 55         tags = head.findAll(script)#,{‘rel‘:‘stylesheet‘}
 56         if tags != []:
 57             for item in tags:
 58                 try:
 59                     item[src] = self.static_url + item[src]
 60                     print item[src]
 61                     self.SaveHtml(soup,filePath)
 62                 except KeyError:
 63                     continue
 64         else :
 65             print tags,filePath
 66         self.SaveFile(soup,filePath)
 67 
 68     def SaveFile(self,soup,file):
 69         html = str(soup)
 70         with open(file,wb) as code:
 71             code.write(html)
 72 
 73     def IsNullArr(self,Arr):
 74         if Arr != []:
 75             return Arr
 76         else:
 77             print array is null
 78 
 79     def DownLoadHtml(self,arr):
 80         tags = bs.IsNullArr(arr)
 81         for item in tags:
 82             liName = item.parent.name
 83             if any(liName in s for s in self.distinguish):
 84                 continue
 85             else:
 86                 htmlUrl = self.url + item[self.dis_key]
 87                 print htmlUrl
 88                 fileName = os.path.join(self.dir,item[self.dis_key])
 89                 print saving: + htmlUrl
 90                 self.SaveHtml(fileName,htmlUrl)
 91 
 92     def SaveHtml(self,fileName,htmlUrl):
 93         f = urllib2.urlopen(htmlUrl)
 94         html = f.read()
 95         with open(fileName,"wb") as code:
 96             code.write(html)#.decode(‘utf-8‘)
 97                     
 98     def GetSearchResult(self):
 99         doc = urllib2.urlopen(self.url)
100         soup = BeautifulSoup(doc)
101         soup.originalEncoding
102         tag = soup.findAll(self.key1,{self.key2:self.key3})
103         return tag
104 
105     def SplitString(self,source,sep):
106         return source.strip().split(/)
107 
108     def CreateDir(self):
109         if not os.path.exists(self.dir):
110             os.makedirs(os.path.join(self.dir))
111 if __name__==__main__:
112     urls = http://docs.python.org/2/library/
113     static_url = http://docs.python.org/2/
114     dirs = E:/demo/PythonLib1/
115     bs = BookSave(dirs,urls,static_url,p,href,a,class,reference internal)
116     bs.CreateDir()
117     fileName = os.path.join(dirs,index.html)
118     htmlUrl = urls + index.html
119     bs.SaveHtml(fileName,htmlUrl)
120     tags = bs.GetSearchResult()
121     #print tags
122     bs.DownLoadHtml(tags)
123     bs.AddUrl()
下载python标准库--python

下载python标准库--python,布布扣,bubuko.com

下载python标准库--python

上一篇:蓝桥杯 第三届C/C++预赛真题(7) 放棋子(水题)


下一篇:ubuntu memcached安装和java中memcached使用demo