全部原创截止到2021.1.29可用, 直接写入了数据库看代码就可以知道数据表设计的方式, 有帮助的话点个赞谢谢.
from selenium import webdriver
from time import sleep
import xlwt # 进行excel操作
import pandas as pd
from sqlalchemy import create_engine
driver=webdriver.Chrome()
#打开网页
driver.get("https://hotels.ctrip.com/hotels/list?countryId=1&city=48&checkin=2021/02/22&checkout=2021/02/23&optionId=48&optionType=City&directSearch=0&display=%E4%B8%9C%E6%96%B9%2C%20%E6%B5%B7%E5%8D%97%2C%20%E4%B8%AD%E5%9B%BD&crn=1&adult=1&children=0&searchBoxArg=t&travelPurpose=0&ctm_ref=ix_sb_dl&domestic=1&")
#19&optionId=31&optionType=Province&directSearch=0&display=%E6%B5%B7%E5%8D%97%2C%20%E4%B8%AD%E5%9B%BD&crn=1&adult=1&children=0&searchBoxArg=t&travelPurpose=0&ctm_ref=ix_sb_dl&domestic=1&
#通过xpath点击搜索
driver.maximize_window()
#driver.find_element_by_xpath("//*[@id='hotels-destination']").send_keys("海口")
#driver.find_element_by_xpath("//*[@id='ibu_hotel_container']/div[1]/div[1]/div[3]/div/div/ul/li[5]/div").click()
driver.implicitly_wait(30)#隐式休息20s 登录携程 # 加长到30s 办两件事 1. 把登录框X掉 2. 一直下拉直到出现查看更多蓝色按钮为止
newData = pd.DataFrame(columns=['hotel', 'price', 'address', 'score', 'number','image'])
# for i in range(1,11):
for j in range(4,14):
name=driver.find_element_by_xpath("//*[@id='ibu_hotel_container']/div/section/div[2]/ul/li["+str(j)+"]/div/div/div/div[1]/div[2]/div[1]/div/span[1]")
price=driver.find_element_by_xpath("//*[@id='ibu_hotel_container']/div/section/div[2]/ul/li["+str(j)+"]/div/div/div/div[2]/div[2]/div[1]/p/span")
#address=driver.find_element_by_xpath("//*[@id='ibu_hotel_container']/div/section/div[2]/ul/li["+str(j)+"]/div/div/div/div[1]/div[2]/div[2]/p/span[1]")
percent=driver.find_element_by_xpath("//*[@id='ibu_hotel_container']/div/section/div[2]/ul/li["+str(j)+"]/div/div/div/div[2]/div[1]/div/div[2]/span")
people=driver.find_element_by_xpath("//*[@id='ibu_hotel_container']/div/section/div[2]/ul/li["+str(j)+"]/div/div/div/div[2]/div[1]/div/div[1]/p[2]/a")
image = driver.find_element_by_xpath("//*[@id='ibu_hotel_container']/div/section/div[2]/ul/li["+str(j)+"]/div/div/div/div[1]/div[1]/div/div/div")
names = name.get_attribute("textContent").replace('\n', '').replace('\t', '')
prices = price.get_attribute("textContent")
percents = percent.get_attribute("textContent")
peoples = people.get_attribute("textContent")
# lst = address.get_attribute("textContent").split(" ")
# a = len(lst)
# addresses = lst[a-1]
addresses = "东方"
xiugai = image.get_attribute("style")
lstn = xiugai.strip('");').split("//")
ls = len(lstn)
images = "https://" + lstn[ls-1]
sleep(3)
#if j%2 == 0:
# driver.find_element_by_xpath("//*[@id='ibu_hotel_container']/div/section/div[2]/ul/div[2]/div/span").click()
newData = newData.append(pd.Series({'hotel': names, 'price': prices, 'address': addresses, 'score': percents, 'number': peoples, 'image': images}),ignore_index=True)
data=newData
engine = create_engine('mysql+pymysql://root:root@localhost:3306/myblog')
data.to_sql('cityhotel', engine, if_exists='append')
print("爬取完毕!")