selenium爬取精彩评论(目前解决方案)

1、爬取精彩评论

from selenium import  webdriver # 从selenium库中调用webdriver模块
import time

driver = webdriver.Chrome()
options=webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches",['enable-automation','enable-logging'])
driver=webdriver.Chrome(chrome_options=options)

driver.get('https://y.qq.com/n/yqq/song/000xdZuV2LcQ19.html') # 访问页面
time.sleep(2)
comments = driver.find_elements_by_class_name('mod_hot_comment')[1].find_elements_by_class_name('comment__list_item') # 使用class_name找到评论

print(len(comments)) # 打印获取到的评论个数
for comment in comments: # 循环
    sweet = comment.find_element_by_tag_name('p') # 找到评论
    print ('评论:%s\n ---\n'%sweet.text) # 打印评论
driver.close() # 关闭浏览器

这个提取的元素为什么一定要加[1]?查看网页元素知道,精彩评论在第二个位置
selenium爬取精彩评论(目前解决方案)

2、精彩评论点击更多加载评论

from selenium import  webdriver # 从selenium库中调用webdriver模块
import time

driver = webdriver.Chrome() # 声明浏览器对象
driver.get('https://y.qq.com/n/yqq/song/000xdZuV2LcQ19.html') # 访问页面


j=0
a = 0
time.sleep(3)
while a<5:
    comments = driver.find_element_by_xpath('//*[@id="comment_box"]/div[4]/ul').find_elements_by_class_name('c_b_normal') # xpath方法来自:https://mp.weixin.qq.com/s/SgUHuOvz0QArf-bbjUzjJw
    time.sleep(1)
    for i in comments[j:]:
        comment = i.find_element_by_tag_name('p').text
        print(comment)
    time.sleep(1)
    element = driver.find_element_by_class_name('comment__show_all_link')
    driver.execute_script("arguments[0].click();", element)

    time.sleep(3)
    j+=15
    a+=1

点击的元素这修改了:driver.find_element_by_class_name(‘comment__show_all_link’).click()

参考链接:https://blog.csdn.net/please_fix_/article/details/104949016

3、selenium与BeautifulSoup配合使用爬取评论

只获取一个界面:

from selenium import  webdriver # 从selenium库中调用webdriver模块
import time
from bs4 import BeautifulSoup

driver = webdriver.Chrome()

driver.get('https://y.qq.com/n/yqq/song/000xdZuV2LcQ19.html') # 访问页面
time.sleep(2)

pageSource = driver.page_source
soup = BeautifulSoup(pageSource,'html.parser')
comments = soup.find_all(class_='mod_hot_comment')[1].find_all(class_='comment__list_item')
print(len(comments)) # 打印获取到的评论个数
for comment in comments: # 循环
    sweet = comment.find('p') # 找到评论
    print ('评论:%s\n ---\n'%sweet.text) # 打印评论
driver.close() # 关闭浏览器

点击爬取更多:

from selenium import  webdriver # 从selenium库中调用webdriver模块
import time
from bs4 import BeautifulSoup

driver = webdriver.Edge()

driver.get('https://y.qq.com/n/yqq/song/000xdZuV2LcQ19.html') # 访问页面
time.sleep(2)

a = 0
while a<5:
    element = driver.find_element_by_class_name('comment__show_all_link')
    driver.execute_script("arguments[0].click();", element)
    time.sleep(2)
    a+=1
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'html.parser')
comments = soup.find_all(class_='mod_hot_comment')[1].find_all(class_='comment__list_item') # xpath方法来自:https://mp.weixin.qq.com/s/SgUHuOvz0QArf-bbjUzjJw
time.sleep(1)
print(len(comments))
for i in comments:
    comment = i.find('p').text
    print(comment)
driver.close() # 关闭浏览器
上一篇:MySQL生成百万条数据超详细步骤


下一篇:Spring Boot缓存管理