爬取天气数据,利用Pyecharts作轮播图

 

爬取网站链接:https://lishi.tianqi.com/xiamen/202312.html

爬取了厦门市2023年一整年的天气数据,包括最高温,最低温,天气,风力风向等 

爬虫代码:

import requests
import pandas as pd
import csv
from pyecharts.charts import Bar,Timeline
import pyecharts.options as opts
import parsel

f = open('天气数据.csv',mode='w',encoding='utf-8',newline='')
csv_writer = csv.DictWriter(f,fieldnames=['日期','星期','最高温','最低温','天气','风向','风力'])
csv_writer.writeheader()
cookies = {
    'cityPy': 'xiamen',
    'cityPy_expire': '1721098187',
    'UserId': '17204933865319972',
    'Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2': '1720493387',
    'HMACCOUNT': '4A9167DA75AB7059',
    'Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2': '1720493402',
    'Hm_lvt_7c50c7060f1f743bccf8c150a646e90a': '1720493523',
    'Hm_lvt_30606b57e40fddacb2c26d2b789efbcb': '1720493529',
    'Hm_lpvt_30606b57e40fddacb2c26d2b789efbcb': '1720493529',
    'Hm_lpvt_7c50c7060f1f743bccf8c150a646e90a': '1720493646',
}

headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'zh-CN,zh;q=0.9',
    'cache-control': 'no-cache',
    # 'cookie': 'cityPy=xiamen; cityPy_expire=1721098187; UserId=17204933865319972; Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1720493387; HMACCOUNT=4A9167DA75AB7059; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1720493402; Hm_lvt_7c50c7060f1f743bccf8c150a646e90a=1720493523; Hm_lvt_30606b57e40fddacb2c26d2b789efbcb=1720493529; Hm_lpvt_30606b57e40fddacb2c26d2b789efbcb=1720493529; Hm_lpvt_7c50c7060f1f743bccf8c150a646e90a=1720493646',
    'pragma': 'no-cache',
    'priority': 'u=0, i',
    'referer': 'https://lishi.tianqi.com/xiamen/202302.html',
    'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
}
for i in range(1,13):
    if i < 10:
        i = '0'+str(i)
    url = f'https://lishi.tianqi.com/xiamen/2023{i}.html'
    response = requests.get(url=url, cookies=cookies, headers=headers)
    response.encoding=response.apparent_encoding
    html = response.text
    selector = parsel.Selector(html)
    li_list = selector.css('div.tian_three ul.thrui li')
    for li in li_list:
        date_time = li.css('div.th200::text').get().split(' ')[0]
        xingqi_time = li.css('div.th200::text').get().split(' ')[1]
        all_list = li.css('div.th140::text').getall()
        max_temp = all_list[0]
        min_temp = all_list[1]
        weather = all_list[2]
        wind_orient = all_list[3].split(' ')[0]
        wind_rank = all_list[3].split(' ')[1]
        dit = {
            '日期': date_time,
            '星期': xingqi_time,
            '最高温': max_temp,
            '最低温': min_temp,
            '天气': weather,
            '风向': wind_orient,
            '风力': wind_rank,
        }
        csv_writer.writerow(dit)
        print(date_time,xingqi_time,max_temp,min_temp,weather,wind_orient,wind_rank)

保存到csv文件:

 

接下来读取文件,对文件格式做调整,以进行绘图操作。 

df = pd.read_csv('天气数据.csv')
print(df.info())
df['日期'] = pd.to_datetime(df['日期'])
df['month_'] = df['日期'].dt.to_period('M')
new_data = df.groupby(['month_','天气']).size().reset_index()
new_data.columns = ['month','weather','count'] #改变列名
print(df.head)
print(new_data)
print(new_data[new_data['month']=='2023-01'][['weather','count']].sort_values(by='count',ascending=False).values.tolist())  # 条件筛选,获取一月份的天气和数量值,获取的是数据框格式
#.sort_values(by='count',ascending=False).values.tolist() 这段代码是按升序排序,获取值并转变为双列表格式
# [['多云', 14], ['小雨', 5], ['晴', 5], ['雾', 4], ['阴', 2], ['中雨', 1]]

new_data数据格式如下: 

 

现在进行绘图操作:

# 实例化一个时间序列的对象
timeline = Timeline()
timeline.add_schema(play_interval=1000)  #单位是毫秒
for month in new_data['month'].unique():
    data = (
        new_data[new_data['month'] == month][['weather', 'count']].sort_values(by='count',
                                                                                   ascending=False).values.tolist()
    )
    print(data)
    bar = (
        Bar()
        .add_xaxis([x[0] for x in data])
        .add_yaxis('',[x[1] for x in data])
        .reversal_axis()  #坐标轴倒转
        .set_global_opts(
            title_opts=opts.TitleOpts(title='厦门市2023年每月天气变化')
        )
        .set_series_opts(
            label_opts=opts.LabelOpts(position='right')
        )
    )
    timeline.add(bar,f'{month}')


timeline.render('天气轮播图.html')

 

点击轮播图下方的按钮就可以进行播放了。 

 

上一篇:【排序 - 冒泡排序】


下一篇:【C++编程】Hello World 代码示例