python 网站自动抠图

python 网站自动抠图


翻东西, 突然找到了以前用python写的自动下载网站图片的一个脚本

很好用, 如果修改的话, 只要将路径改成自己的即可!


picdown.py

#! /usr/bin/python 
# -*- coding: utf-8 -*-

import urllib2, openurl, sys, urllib, os

filename = r"/home/hunting/xiaolong/Yunio/py/imagesaved"
imageNum = 0

def shiftDir(path):
	try:
		os.chdir(path)
	except:
		print "cant open file or the file is ERROR"
		sys.exit(1)

def download(filename):
	try:
		print"正在下载...  %s" % filename.split("/")[-1]
		urllib.urlretrieve(filename, filename.split("/")[-1])
		return 1
	except:
		print "%s 下载失败!" % filename.split("/")[-1]
		return 0

if len(sys.argv) < 2:
	print "you can print --help for more infomation!"
	sys.exit(-1)
elif sys.argv[1].startswith("--help"):
	print ‘‘‘输入:picture [url] 
	来下载图片‘‘‘
else:

	if len(sys.argv) == 2:
		shiftDir(filename)
	else:	
		shiftDir(sys.argv[2])
	saveimage = []
	url = sys.argv[1]
	info = urllib2.urlopen(url).read()
	message = openurl.URL()
	message.feed(info)

	for imag in message.images:
		if imag.endswith(".jpg") or imag.endswith(".png") or imag.endswith(".gif"):
			if imag.startswith("http:"):
				saveimage.append(imag)
			else: 
				imag = r"http://" + imag
				saveimage.append(imag)

	for im in saveimage:
		flag = download(im)
		if flag:
			imageNum = imageNum + 1
	
	print "the total images download %d pages" % imageNum


openurl.py
#! usr/bin/python 

import urllib2
from sgmllib import SGMLParser

class URL (SGMLParser):
	def reset(self):
		SGMLParser.reset(self)
		self.images = []	
	def start_img(self, temp):
		img = [v for k, v in temp]
		if img:
			self.images.extend(img)
		
	


python 网站自动抠图,布布扣,bubuko.com

python 网站自动抠图

上一篇:VC++6.0插件系列


下一篇:C++中的静态变量地址探讨