Python爬虫--淘宝“泸州老窖”

爬虫淘宝--“泸州老窖”

爬去淘宝“泸州老窖” 相关信息:

import requests
import re
import json
import pandas as pd ## cookie
usercookie = 'miid.......' def cookie_get(usercookie):
"""
# 把字符串转换为字典
:param usercookie:
:return:
"""
cookies = {}
for a in usercookie.split(";"):
name, value = a.strip().split("=", 1)
cookies[name] = value
return cookies def open_url(keyword, page):
keyload = {'q': keyword ,'s':str((page-1) * 44), 'sort':'sale-desc'}
url = "https://s.taobao.com/search"
cookies = cookie_get(usercookie) res = requests.get(url, params=keyload, cookies = cookies)
return res def get_page_items():
with open("items.txt", "r", encoding="utf-8") as file1:
g_page_config = re.search(r"g_page_config = (.*?);\n", file1.read())
page_config_json = json.loads(g_page_config.group(1))
page_items = page_config_json['mods']['itemlist']['data']['auctions']
return page_items def get_reslut(page_items, results):
# results = []
# page_items = get_page_items()
for each_items in page_items:
dict1 = dict.fromkeys(('nid', 'title', 'detail_url', 'view_price', 'view_sales', 'nick'))
dict1['nid'] = each_items['nid']
dict1['title'] = each_items['title']
dict1['detail_url'] = each_items['detail_url']
dict1['view_price'] = each_items['view_price']
dict1['view_sales'] = each_items['view_sales']
dict1['nick'] = each_items['nick']
results.append(dict1)
return results def results_to_excel(results):
pf = pd.DataFrame(list(results))
order = ['nid', 'title', 'detail_url', 'view_price', 'view_sales', 'nick']
pf = pf[order]
file_name = pd.ExcelWriter('泸州老窖.xlsx')
pf.fillna(' ', inplace=True) # 替换空格
pf.to_excel(file_name, encoding='utf-8', index = False)
file_name.save() def main():
keyword = '泸州老窖' # input("请输入搜索关键词")
length = 4 # 获取前3页商品( 44 * 3 ),每页44个商品,
results = []
for each in list(range(1, length)):
res = open_url(keyword, each)
with open("items.txt", 'w', encoding="utf-8") as file:
file.write(res.text)
page_items = get_page_items()
results = get_reslut(page_items,results) results_to_excel(results) if __name__ == "__main__":
main()
上一篇:postgresql crosstab 行转列函数


下一篇:loadrunner调用jar包方法