代码如下
-- coding: utf-8 --
import requests
import re
from bs4 import BeautifulSoup
from xlwt import Workbook
import urllib.request
获取页面的url 并返回页面内容
def getHtmlText(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status
r.encoding = r.apparent_encoding
return r.text
except:
return “”
def fillPhoneList(infoDict, html):
soup = BeautifulSoup(html, “html.parser”)
tr = soup.find(‘div’, attrs={‘class’: ‘goods-list-v2 gl-type-3 J-goods-list’})
tq = tr.find(‘ul’, attrs={‘class’: ‘gl-warp clearfix’})
tb = tq.find(‘li’, attrs={‘class’: ‘gl-item’})
info = tb.find_all(‘div’, attrs={‘class’: ‘gl-i-wrap’})
m = {}
for i in info:
tc = i.find(‘div’, attrs={‘class’: ‘p-commit’})
count = tc.strong.find(‘a’).text
m=(count)
return m
def main1():
Uinfo = {}
url = “https://search.jd.com/search?keyword=手机自营&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&ev=exbrand_华为(HUAWEI)^&uc=0#J_searchWrap”
html = getHtmlText(url)
m = fillPhoneList(Uinfo, html)
print(m)