1、爬取精彩评论
from selenium import webdriver # 从selenium库中调用webdriver模块
import time
driver = webdriver.Chrome()
options=webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches",['enable-automation','enable-logging'])
driver=webdriver.Chrome(chrome_options=options)
driver.get('https://y.qq.com/n/yqq/song/000xdZuV2LcQ19.html') # 访问页面
time.sleep(2)
comments = driver.find_elements_by_class_name('mod_hot_comment')[1].find_elements_by_class_name('comment__list_item') # 使用class_name找到评论
print(len(comments)) # 打印获取到的评论个数
for comment in comments: # 循环
sweet = comment.find_element_by_tag_name('p') # 找到评论
print ('评论:%s\n ---\n'%sweet.text) # 打印评论
driver.close() # 关闭浏览器
这个提取的元素为什么一定要加[1]?查看网页元素知道,精彩评论在第二个位置
2、精彩评论点击更多加载评论
from selenium import webdriver # 从selenium库中调用webdriver模块
import time
driver = webdriver.Chrome() # 声明浏览器对象
driver.get('https://y.qq.com/n/yqq/song/000xdZuV2LcQ19.html') # 访问页面
j=0
a = 0
time.sleep(3)
while a<5:
comments = driver.find_element_by_xpath('//*[@id="comment_box"]/div[4]/ul').find_elements_by_class_name('c_b_normal') # xpath方法来自:https://mp.weixin.qq.com/s/SgUHuOvz0QArf-bbjUzjJw
time.sleep(1)
for i in comments[j:]:
comment = i.find_element_by_tag_name('p').text
print(comment)
time.sleep(1)
element = driver.find_element_by_class_name('comment__show_all_link')
driver.execute_script("arguments[0].click();", element)
time.sleep(3)
j+=15
a+=1
点击的元素这修改了:driver.find_element_by_class_name(‘comment__show_all_link’).click()
参考链接:https://blog.csdn.net/please_fix_/article/details/104949016
3、selenium与BeautifulSoup配合使用爬取评论
只获取一个界面:
from selenium import webdriver # 从selenium库中调用webdriver模块
import time
from bs4 import BeautifulSoup
driver = webdriver.Chrome()
driver.get('https://y.qq.com/n/yqq/song/000xdZuV2LcQ19.html') # 访问页面
time.sleep(2)
pageSource = driver.page_source
soup = BeautifulSoup(pageSource,'html.parser')
comments = soup.find_all(class_='mod_hot_comment')[1].find_all(class_='comment__list_item')
print(len(comments)) # 打印获取到的评论个数
for comment in comments: # 循环
sweet = comment.find('p') # 找到评论
print ('评论:%s\n ---\n'%sweet.text) # 打印评论
driver.close() # 关闭浏览器
点击爬取更多:
from selenium import webdriver # 从selenium库中调用webdriver模块
import time
from bs4 import BeautifulSoup
driver = webdriver.Edge()
driver.get('https://y.qq.com/n/yqq/song/000xdZuV2LcQ19.html') # 访问页面
time.sleep(2)
a = 0
while a<5:
element = driver.find_element_by_class_name('comment__show_all_link')
driver.execute_script("arguments[0].click();", element)
time.sleep(2)
a+=1
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'html.parser')
comments = soup.find_all(class_='mod_hot_comment')[1].find_all(class_='comment__list_item') # xpath方法来自:https://mp.weixin.qq.com/s/SgUHuOvz0QArf-bbjUzjJw
time.sleep(1)
print(len(comments))
for i in comments:
comment = i.find('p').text
print(comment)
driver.close() # 关闭浏览器