实现抓图的工具
#encoding:UTF-8
import urllib
import urllib2
import re
import os
from BeautifulSoup import BeautifulSoup
def GetUrlContent(url,path):
#url = "http://www.2cto.com/meinv/sexmv/"
req = urllib2.urlopen(url)
content = req.read()
soup = BeautifulSoup(content)
# print soup.pret()
#查找左右链接,并且不含title属性
alinks = soup.findAll('a', attrs={"target": "_blank"}, title=None)
i = 0
for a in alinks:
surl = a['href']
print surl
GetUrl(surl,path)
print " "
print " "
#print surl
def createFileWithFileName(localPathParam,fileName):
totalPath=localPathParam+'\\'+fileName
if not os.path.exists(totalPath):
file=open(totalPath,'a+')
file.close()
return totalPath
def GetFileName(url):
sFilename=os.path.basename(url)
return sFilename
def GetUrl(myUrl,localPath):
#url = "http://www.2cto.com/meinv/sexmv/1819.html"
try:
req = urllib2.urlopen(myUrl,None,5)
content = req.read()
soup = BeautifulSoup(content)
alinks =soup.findAll("img",attrs={"src": re.compile("(.*)uploads/allimg(.*)")})
for d in alinks:
imgUrl=d["src"]
print imgUrl
fileName=GetFileName(imgUrl)
print fileName
urllib.urlretrieve(imgUrl,createFileWithFileName(localPath,fileName))
except Exception,e:
print "Error"
if __name__=='__main__':
#GetUrl("http://www.2cto.com/meinv/sexmv/1810.html")
print GetFileName("http://www.2cto.com/meinv/sexmv/1810.jpg")
如果您觉得阅读本文对您有帮助,请点一下“推荐”按钮,您的“推荐”将是我最大的写作动力!欢迎各位转载,但是未经作者本人同意,转载文章之后必须在文章页面明显位置给出作者和原文连接,否则保留追究法律责任的权利。 原文链接:https://yq.aliyun.com/articles/504382