1. 微信文章动态爬取的一个例子
import time from selenium import webdriver driver = webdriver.Chrome() driver.get("https://mp.weixin.qq.com/s/FCsJMGlWvwfR18YtLSLKtQ") time.sleep(1) # 执行这段代码,会获取到当前窗口总高度 js = "return action=document.body.scrollHeight" # 初始化现在滚动条所在高度为0 height = 0 # 当前窗口总高度 new_height = driver.execute_script(js) while height < new_height: # 将滚动条调整至页面底部 for i in range(height, new_height, 100): driver.execute_script(‘window.scrollTo(0, {})‘.format(i)) time.sleep(0.5) height = new_height time.sleep(2) new_height = driver.execute_script(js) # window下保存文件要转码utf-8 with open(r‘b.html‘, ‘wb‘) as f: f.write(driver.page_source.encode(‘utf-8‘)) driver.close()
参考:https://blog.csdn.net/weixin_44673043/article/details/104971675
二. 微博模拟登陆
1. 百度搜微博开放平台可满足爬取量不大的情况
2. 微博模拟登陆和下拉鼠标应对ajax加载
from selenium import webdriver import time browser = webdriver.Chrome() browser.get(‘https://www.weibo.com‘) time.sleep(10) browser.find_element_by_css_selector("#loginname").send_keys("13880576568") browser.find_element_by_css_selector(".info_list.password input[node-type=‘password‘]").send_keys("shiyan823") browser.find_element_by_css_selector(".info_list.login_btn a[node-type=‘submitBtn‘]").click() # 鼠标下拉 for i in range(3): browser.execute_script(‘window.scrollTo(0, document.body.scrollHeight)‘) time.sleep(3)