网站:http://www.sci99.com/targetprice/
网站通过点击日历来切换内容,使用Ajax。
通过post请求获取日期对应的内容,刚开始只构造selecttime,结果获得的日期还是默认的
所以还要构造中间几个参数:__VIEWSTATE、__VIEWSTATEGENERATOR、__EVENTVALIDATION。
先使用getHiddenvalue()获取三个参数,再放到formData里即可
def getHiddenvalue(): request = urllib.request.Request(URL, headers=header) reponse = urllib.request.urlopen(request) resu = reponse.read() page = resu.decode('utf-8') vs = re.search(r'<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="(.*?)" />', page) vsg = re.search( r'<input type="hidden" name="__VIEWSTATEGENERATOR" id="__VIEWSTATEGENERATOR" value="(.*?)" />', page) ev = re.search( r'<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="(.*?)" />', page) html = etree.HTML(page) return vs.group(1), vsg.group(1), ev.group(1)
获取网页函数:
def getPage(date): vs, vsg, ev = getHiddenvalue() data = { "ScriptManager1": "UpdatePanel1|btn_search", "selecttime": date, "__VIEWSTATE": vs, "__VIEWSTATEGENERATOR": vsg, "__EVENTVALIDATION": ev, "__ASYNCPOST": "true", "btn_search": "" } formData = urlencode(data) page = requests.post(URL, data=formData, headers=header) page.encoding = "UTF-8" return page.text