python 网站自动抠图
翻东西, 突然找到了以前用python写的自动下载网站图片的一个脚本
很好用, 如果修改的话, 只要将路径改成自己的即可!
picdown.py
#! /usr/bin/python # -*- coding: utf-8 -*- import urllib2, openurl, sys, urllib, os filename = r"/home/hunting/xiaolong/Yunio/py/imagesaved" imageNum = 0 def shiftDir(path): try: os.chdir(path) except: print "cant open file or the file is ERROR" sys.exit(1) def download(filename): try: print"正在下载... %s" % filename.split("/")[-1] urllib.urlretrieve(filename, filename.split("/")[-1]) return 1 except: print "%s 下载失败!" % filename.split("/")[-1] return 0 if len(sys.argv) < 2: print "you can print --help for more infomation!" sys.exit(-1) elif sys.argv[1].startswith("--help"): print ‘‘‘输入:picture [url] 来下载图片‘‘‘ else: if len(sys.argv) == 2: shiftDir(filename) else: shiftDir(sys.argv[2]) saveimage = [] url = sys.argv[1] info = urllib2.urlopen(url).read() message = openurl.URL() message.feed(info) for imag in message.images: if imag.endswith(".jpg") or imag.endswith(".png") or imag.endswith(".gif"): if imag.startswith("http:"): saveimage.append(imag) else: imag = r"http://" + imag saveimage.append(imag) for im in saveimage: flag = download(im) if flag: imageNum = imageNum + 1 print "the total images download %d pages" % imageNum
#! usr/bin/python import urllib2 from sgmllib import SGMLParser class URL (SGMLParser): def reset(self): SGMLParser.reset(self) self.images = [] def start_img(self, temp): img = [v for k, v in temp] if img: self.images.extend(img)