用python+selenium抓取微博24小时热门话题的前15个并保存到txt中

抓取微博24小时热门话题的前15个,抓取的内容请保存至txt文件中,需要抓取排行、话题和阅读数

 #coding=utf-8
 from selenium import webdriver
 import unittest
 from time import sleep

 class Weibo(unittest.TestCase):

     def setUp(self):
         self.dr = webdriver.Chrome()
         self.hot_list = self.get_weibo_hot_topic()
         self.weibo_topic = self.get_top_rank_file()

     def get_weibo_hot_topic(self):
         self.dr.get('http://weibo.com/')
         sleep(5)
         self.login('649_xxxx@qq.com','kemi_xxxx') #微博帐号密码
         self.dr.get('http://d.weibo.com/100803?cfs=&Pl_Discover_Pt6Rank__5_filter=hothtlist_type%3D1#_0')
         sleep(5)
         hot_topic_list = []
         i = 0
         while i < 15:
             #rank_and_topic = self.dr.find_elements_by_css_selector('.title.W_autocut')[i].text #定位排行和话题
             rank = self.dr.find_elements_by_css_selector('div.title.W_autocut>span')[i].text  #定位排行
             topic = self.dr.find_elements_by_css_selector('div.title.W_autocut>a.S_txt1')[i].text  #定位话题
             number = self.dr.find_elements_by_css_selector('.number')[i].text #定位阅读数
             hot_topic_list.append([rank, topic, number])
             i += 1
         return hot_topic_list

     def get_top_rank_file(self):
         self.file_title = '微博24小时热门话题'
         self.file = open(self.file_title + '.txt', 'wb')
         for item in self.hot_list:
             separate_line = '~~~~~~~~~~~~~~~~~~~~~~~~\n'  #分隔线
             self.file.write(separate_line.encode('utf-8'))
             self.file.write((item[0]+'  '+item[1]+'  '+'阅读数:'+item[2]+'\n').encode('utf-8'))
         self.file.close()

     def login(self, username, password):
         self.dr.find_element_by_name('username').clear()
         self.dr.find_element_by_name('username').send_keys(username)
         self.dr.find_element_by_name('password').send_keys(password)
         self.dr.find_element_by_css_selector('.info_list.login_btn').click()

     def test_weibo_topic(self):
         pass
         print('抓取完毕')

     def tearDown(self):
         self.dr.quit()

 if __name__== '__main__':
     unittest.main()

网页如下:

用python+selenium抓取微博24小时热门话题的前15个并保存到txt中

生成txt文件如下:

用python+selenium抓取微博24小时热门话题的前15个并保存到txt中

上一篇:源码编译安装lnmp环境(nginx-1.14.2 + mysql-5.6.43 + php-5.6.30 )------踩了无数坑,重装了十几次服务器才会的,不容易啊!


下一篇:word标题编号与上一级不一致的解决方法