爬取秦皇岛天气情况

一.本次用的是bs4方法解析

二.

上源代码

import requests
import re
import time
import xlwt
from bs4 import BeautifulSoup
import pandas as pd
from fake_useragent import UserAgent
date_box = []
max_temp = []
min_temp = []
weh = []
wind = []
for year in range(2011,2022):
    for month in range(1,13):
        print(f'爬取{year}年{month}月的天气数据')
        if month<10:
            month_str='0'+str(month)
        else:
            month_str=str(month)
        url='https://lishi.tianqi.com/qinhuangdao/'+str(year)+month_str+'.html'
        headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
        }
        html=requests.get(url,headers=headers).text
        bs=BeautifulSoup(html,'html.parser')
        data=bs.find_all(class_='thrui')
        date=re.compile('class="th200">(.*?)</')
        tem=re.compile('class="th140">(.*?)</')
        time1=re.findall(date,str(data))
        print(time1)
        time.sleep(0.05)
        for item in time1:
            date_box.append(item[:10])
        temp=re.findall(tem, str(data))
        print(temp)
        temp_len=len(temp)
        temp_len2=int(temp_len/4)
        for i in range(temp_len2):
            max_temp.append(temp[i * 4 + 0])
            min_temp.append(temp[i * 4 + 1])
            weh.append(temp[i * 4 + 2])
            wind.append(temp[i * 4 + 3])
        datas = pd.DataFrame({'日期':date_box,'最高温度': max_temp, '最低温度': min_temp, '天气': weh, '风向': wind})
        print(datas)
        datas.to_csv('./秦皇岛天气情况.csv',index=False,sep=',',encoding='utf-8_sig')

上一篇:【博客认证卡片】


下一篇:个人学习进度