from selenium import webdriver from selenium.webdriver.chrome.options import Options url1 = 'http://www.netbian.com' # 要爬的网址 path = 'chromedriver.exe' # 驱动路径 # 创建一个参数配置对象 chrome_options = Options() # 不加载图片 prefs = {"profile.managed_default_content_settings.images":2} chrome_options.add_experimental_option("prefs", prefs) # 生成浏览器 browser = webdriver.Chrome(path, options=chrome_options) fengleis = [] data = [] def getFenglei(): browser.get(url1) for i in range(2, 3 + 1): fenlei = browser.find_element_by_xpath('//*[@id="header"]/div[1]/ul/li[1]/div/a[%d]'%i).get_attribute('href') print(fenlei) fengleis.append(fenlei) # 获取数据 def getData(url): # 获取数据 # 每页多少张 for i in range(1, 3): if i != 3: browser.get(url) # 获取 缩略图跳到下载页面的地址 li_a = browser.find_element_by_xpath('//*[@id="main"]/div[3]/ul/li[%d]//a' % i).get_attribute('href') if li_a == 'http://pic.netbian.com/': continue # //*[@id="main"]/div[3]/ul/li[2]/a print(li_a) browser.get(li_a) # 打开下载页面网址 download = browser.find_element_by_xpath('//*[@id="main"]/div[3]/div/div/a').get_attribute('href') print(download) # 下载大图地址 browser.get(download) big_img = browser.find_element_by_xpath('//*[@id="endimg"]/tbody/tr/td/a/img') img_url = big_img.get_attribute('src') title = big_img.get_attribute('title') print(img_url, title) data.append({'img_url': img_url, 'title': title}) # browser.get(img_url) # print() # 翻页 def page(url): # 爬多少页 for i in range(1, 10): if i == 1: getData(url) else: # http://www.netbian.com/feizhuliu/index_2.htm getData(url + 'index_%d.htm' % i) def downImg(): import requests headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' } for i in data: # if i == 5: # 限制下载数量 # break try: res = requests.get(i.get('img_url'), headers=headers).content print(res) # 下载路径 with open('./all/%s.jpg' % i.get("title"), 'wb') as f: f.write(res) print('正在下载' + i.get('title')) except Exception as e: print(e) continue def run(): getFenglei() # 获取分类 for url in fengleis: # 一类一类的爬 page(url) # 下载图片 downImg() run()