1.打开网站http://top.baidu.com/buzz?b=1&fr=topindex
2.右键找到源代码
3.用工具爬取数据
import requests from bs4 import BeautifulSoup import pandas as pd url="https://tophub.today/" headers = {'User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} r=requests.get(url) r.encoding=r.apparent_encoding x=r.text soup=BeautifulSoup(x,'lxml') lst1 = soup.find_all('div', class_='zb-kc-i') lst2 = soup.find_all('div', class_='f') print('{:^55}'.format('百度热搜榜')) print('{:^5}\t{:^40}\t{:^10}'.format('排名', '标题', '热度')) for i in range(10): print('{:^5}\t{:^30}\t{:^10}'.format(i+1, lst1[i].string, lst2[i].string)) lst.append([i+1,lst1[i].string,lst2[i].string]) print()