实现抓图的工具

实现抓图的工具


#encoding:UTF-8

import  urllib
import urllib2
import re
import os
from BeautifulSoup import BeautifulSoup

def GetUrlContent(url,path):
    #url = "http://www.2cto.com/meinv/sexmv/"
    req = urllib2.urlopen(url)
    content = req.read()
    soup = BeautifulSoup(content)
    # print soup.pret()
    #查找左右链接,并且不含title属性
    alinks = soup.findAll('a', attrs={"target": "_blank"}, title=None)
    i = 0
    for a in alinks:
        surl = a['href']
        print surl
        GetUrl(surl,path)
        print " "
        print " "
        #print surl

def createFileWithFileName(localPathParam,fileName):
    totalPath=localPathParam+'\\'+fileName
    if not os.path.exists(totalPath):
        file=open(totalPath,'a+')
        file.close()
        return totalPath

def GetFileName(url):
    sFilename=os.path.basename(url)
    return sFilename

def GetUrl(myUrl,localPath):
    #url = "http://www.2cto.com/meinv/sexmv/1819.html"
    try:
        req = urllib2.urlopen(myUrl,None,5)
        content = req.read()
        soup = BeautifulSoup(content)
        alinks =soup.findAll("img",attrs={"src": re.compile("(.*)uploads/allimg(.*)")})

        for d in alinks:
            imgUrl=d["src"]
            print imgUrl
            fileName=GetFileName(imgUrl)
            print fileName
            urllib.urlretrieve(imgUrl,createFileWithFileName(localPath,fileName))
    except Exception,e:
        print "Error"

if __name__=='__main__':
    #GetUrl("http://www.2cto.com/meinv/sexmv/1810.html")
    print GetFileName("http://www.2cto.com/meinv/sexmv/1810.jpg")


如果您觉得阅读本文对您有帮助,请点一下“推荐”按钮,您的“推荐”将是我最大的写作动力!欢迎各位转载,但是未经作者本人同意,转载文章之后必须在文章页面明显位置给出作者和原文连接,否则保留追究法律责任的权利。
原文链接:https://yq.aliyun.com/articles/504382
上一篇:Android Studio ARouter插件


下一篇:基于业务场景钉钉机器人做业务逻辑开发