from selenium import webdriver
import random
import pymysql
conn = pymysql.connect(host='rm-bp188t3rvelh6g02huo.mysql.rds.aliyuncs.com',
port=3306,
user="aabb_552200",
passwd='Abc@1234',
db="jindong",
charset='utf8',
use_unicode=True)
cursor = conn.cursor()
def spider1( keyword,id):
url1="https://item.m.jd.com/product/{}.html".format(keyword)
option = webdriver.ChromeOptions()
option.add_argument('--proxy-server=http://{}'.format(id))
option.add_argument("headless")
print(option)
driver = webdriver.Chrome(chrome_options=option)
try:
driver.get(url1)
except:
driver.quit()
driver.maximize_window() # 窗口最大化
# 等待
k = []
link = url1
try:
name = driver.find_element_by_id('itemName').text.replace(' ', '/')
except:
name=""
try:
price = driver.find_element_by_id('priceSale').text
except:
price = ""
k.append([name, price, link])
sql = "INSERT INTO jindong (商品名,价格,商品链接) VALUES (%s,%s,%s)"
print(k)
for n in k:
cursor.execute(sql, n)
conn.commit()
cursor.close()
conn.close()
driver.quit()
def spider( keyword):
url1="https://item.m.jd.com/product/{}.html".format(keyword)
option = webdriver.ChromeOptions()
option.add_argument("headless")
driver = webdriver.Chrome(chrome_options=option)
try:
driver.get(url1)
except:
driver.quit()
driver.maximize_window()
k = []
link = url1
try:
name = driver.find_element_by_id('itemName').text.replace(' ', '/')
except:
name=""
try:
price = driver.find_element_by_id('priceSale').text
except:
price = ""
k.append([name, price, link])
sql = "INSERT INTO jindong (商品名,价格,商品链接) VALUES (%s,%s,%s)"
print(k)
for n in k:
cursor.execute(sql, n)
conn.commit()
driver.quit()
#京东,指定爬取id范围,保存 商品名,价格,链接
if __name__ == '__main__':
b=[]
c=[]
with open("a.txt", 'r', encoding="utf-8") as f:
t = f.read()
for i in t.split("\n"):
b.append(i)
print(b)
for j in range(1,len(b)+1):
# if j ==1:
# with open("b.txt", 'r', encoding="utf-8") as f:
# t = f.read()
# for i in t.split("\n"):
# c.append(i)
# l=random.choice(c)
# spider1(keyword=b[j-1],id=l)
# else:
spider(keyword=b[j-1])
print("保存数据库成功")
cursor.close()
conn.close()