解析json文件,json接口,将内容入数据库(Python脚本)

刚接触用Python解析数据,发现十分的方便

 

先来了解一下json.loads()、json.dumps()和json.dump()、json.load()四个函数

json.loads()、json.dumps()和json.dump()、json.load()分别是两组不一样用法

带s的用于数据类型的转换,不带s的用于操做文件。

json.loads()、json.dumps()概念理解

json自己是字符串,经过json.loads()、json.dumps()能够进行字典和字符串的转换。

由于浏览器不支持字典方式显示,若是请求过来的类型是字典,必须经过json.dumps()函数将字典转换为字符串以后,才可展现。

json.dump()和json.load()概念理解

主要用来读写json文件函数

 

 

1.通过接口解析json数据提取各个城市的实时疫情数据,没有在脚本里进行定时,不知道该怎么写定时任务(查百度也弄不明白),就在linux系统设置了,一天执行一次脚本

# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import json
import time
from pymysql import *

def mes():
    url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0'  #请求地址
    headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36 SLBrowser/6.0.1.6181'}#创建头部信息
    resp =  requests.get(url,headers = headers)  #发送网络请求
    content=resp.content.decode('utf-8')
    print("content"+content)
    soup = BeautifulSoup(content, 'html.parser')
    print(soup)
    listA = soup.find_all(name='script',attrs={"id":"getAreaStat"})

    account =str(listA)

    mes = account.replace('[<script id="getAreaStat">try { window.getAreaStat = ', '')

    mes=mes.replace('}catch(e){}</script>]','')
    #mes=account[52:-21]
    #json.loads 用于解码 JSON 数据。该函数返回 Python 字段的数据类型。

    messages_json = json.loads(mes)
    print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
    print(messages_json)
    times=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    print(times)
    provinceList=[]
    cityList=[]
    lenth=total()
    total()
    sheng = len(messages_json)
    con=sheng+(lenth)#算出数据库已有的条数+今天省份的条数,才是城市的开始id
    print(sheng)
    print(len(messages_json))
    print(con)
    for item in messages_json:
        lenth+=1
        provinceName=item['provinceName']
        nowCount = item['currentConfirmedCount']
        confirmedCount=item['confirmedCount']
        suspectedCount=item['suspectedCount']
        curedCount=item['curedCount']
        deadCount=item['deadCount']
        cities=item['cities']
        provinceList.append((lenth,times,provinceName,None,nowCount,confirmedCount,suspectedCount,curedCount,deadCount))
        for i in cities:
            con+=1
            provinceName = item['provinceName']
            cityName=i['cityName']
            nowCount = i['currentConfirmedCount']
            confirmedCount = i['confirmedCount']
            suspectedCount = item['suspectedCount']
            curedCount = i['curedCount']
            deadCount = i['deadCount']
            cityList.append((con,times,provinceName,cityName,nowCount,confirmedCount,suspectedCount,curedCount,deadCount))
    insert(provinceList,cityList)
    return NULL


def insert(provinceList, cityList):
    provinceTuple=tuple(provinceList)
    cityTuple=tuple(cityList)
    cursor = db.cursor()
    sql = "insert into China values (%s,%s,%s,%s,%s,%s,%s,%s,%s) "
    try:
        cursor.executemany(sql,provinceTuple)
        print("插入成功")
        db.commit()
    except Exception as e:
        print(e)
        db.rollback()
    try:
        cursor.executemany(sql,cityTuple)
        print("插入成功")
        db.commit()
    except Exception as e:
        print(e)
        db.rollback()
    cursor.close()
def total():
    sql= "select * from China"
    cursor = db.cursor()
    try:
        cursor.execute(sql)
        results = cursor.fetchall()
        lenth = len(results)
        db.commit()
        return lenth
    except:
        print('执行失败,进入回调1')
        db.rollback()

# 连接数据库的方法
def connectDB():
    try:
        db = connect(host='localhost', port=3306, user='root', password='123456', db='db2',charset='utf8')
        print("数据库连接成功")
        return db
    except Exception as e:
        print(e)
    return NULL
if __name__ == '__main__':
    db=connectDB()
    mes()

 

sql:

CREATE TABLE `china`  (
  `id` int(11) NOT NULL,
  `time` datetime NULL DEFAULT NULL,
  `provinceName` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '省,直辖市',
  `cityName` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '市',
  `nowCount` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '现存确诊',
  `confirmedCount` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '总共确诊',
  `suspectedCount` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '疑似',
  `curedCount` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '治愈',
  `deadCount` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL COMMENT '死亡',
  PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;

SET FOREIGN_KEY_CHECKS = 1;

 

 

 

2.是一个本地的json文件,解析,然后数据入库。和上面其实差不多

# coding:utf8

import requests
from bs4 import BeautifulSoup
import json
import time
from pymysql import *


def mes():
    with open(r"D:\alldata.json", 'r', encoding='UTF-8') as load_f:
        load_dict = json.load(load_f)
        print(load_dict)
        load_dict = load_dict['stations']  # 拆第一层花括号
        print(len(load_dict))
        con = len(load_dict)
        print(load_dict[0]['id'])
    # data_raw = pd.DataFrame(columns=load_dict.keys())
    # data_raw = data_raw.append(load_dict, ignore_index=True)

    provinceList = []
    lenth = select()
    print("aaaaaaaaa")
    print(select())
    for item in load_dict:
        print(lenth)
        lenth = lenth + 1
        print("bbbbbbbbbbbbbbbbbbbbbb")
        print(lenth)
        id = item['id']
        type = item['type']
        name = item['name']
        dev_id = item['dev_id']
        no = item['no']
        partner = item['partner']
        enabled = item['enabled']
        agent = item['agent']
        total = item['total']
        used = item['used']
        free = item['free']
        root = item['root']
       # agent_list = item['agent_list']
       # agent_list_public = ','.join(agent_list)
       # print(agent_list_public)
        parent = item['parent']
        owner = item['owner']
        longitude = item['longitude']
        latitude = item['latitude']
        address = item['address']
        region = item['region']
        vendor = item['vendor']
        country = item['country']
        province = item['province']
        district = item['district']
        zipcoce = item['zipcoce']
        style = item['style']
        rate = item['rate']
        rate_group = item['rate_group']
        package = item['package']
        lot_rate_group = item['lot_rate_group']
        contact_phone = item['contact_phone']
        contact_name = item['contact_name']
        memo = item['memo']
        stat_total = item['stat']['total']
        stat_busy = item['stat']['busy']
        stat_idle = item['stat']['idle']
        lot_rate = item['lot_rate']
        tags = item['tags']
        price = item['price']
        #数组转字符串
        tags_public = ','.join(tags)
        print(tags_public)
        if not lot_rate:
            print("Empty")
        else:
            print("Not Empty")
        print("========================================================================================================")
        if lot_rate==None:
            print("没有内容")
            print(lot_rate)
            provinceList.append(
                (id, lenth, type, name, dev_id, no, partner, enabled, agent, total, used, free, root, None,
                 parent, owner, longitude, latitude, address, region, vendor, country, province, district,
                 zipcoce, style, rate, rate_group, package, lot_rate_group, contact_phone, contact_name,
                 memo, stat_total, stat_busy, stat_idle, None, None, None, None, None, None, None, None, None, None,
                 None, None, None, tags_public, price))
        else:
            print("有内容")
            print(lot_rate)
            lot_rate_id = lot_rate['id']
            lot_rate_name = lot_rate['name']
            lot_rate_agent = lot_rate['agent']
            lot_rate_unit = lot_rate['unit']
            lot_rate_free_time = lot_rate['free_time']
            lot_rate_cycle = lot_rate['cycle']
            lot_rate_max_perday = lot_rate['max_perday']
            lot_rate_max_permonth = lot_rate['max_permonth']
            lot_rate_creator = lot_rate['creator']
            lot_rate_created_time = lot_rate['created_time']
            lot_rate_update_time = lot_rate['update_time']
            lot_rate_memo = lot_rate['memo']
            #lot_rate如果有内容  该字段置为1
            provinceList.append(
            (id, lenth, type, name, dev_id, no, partner, enabled, agent, total, used, free, root, None,
             parent, owner, longitude, latitude, address, region, vendor, country, province, district,
             zipcoce, style, rate, rate_group, package, lot_rate_group, contact_phone, contact_name,
             memo, stat_total, stat_busy, stat_idle, 1, lot_rate_id, lot_rate_name, lot_rate_agent, lot_rate_unit, lot_rate_free_time,
             lot_rate_cycle, lot_rate_max_perday, lot_rate_max_permonth, lot_rate_creator,lot_rate_created_time, lot_rate_update_time,
             lot_rate_memo, tags_public, price))

        print("相数")
        print(provinceList)
    insert(provinceList)
    return NULL


def insert(provinceList):
    provinceTuple = tuple(provinceList)

    cursor = db.cursor()
    sql = "insert into park_db values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) "
    try:
        cursor.executemany(sql, provinceTuple)
        print("插入成功")
        db.commit()
    except Exception as e:
        print(e)
        db.rollback()

    cursor.close()


def select():
    sql = "select * from park_db"
    cursor = db.cursor()
    try:
        cursor.execute(sql)
        results = cursor.fetchall()
        lenth = len(results)
        db.commit()
        return lenth
    except:
        print('执行失败,进入回调1')
        db.rollback()


def connectDB():
    try:
        db = connect(host='localhost', port=3306, user='root', password='123456', db='db3', charset='utf8')
        print("数据库连接成功")
        return db
    except Exception as e:
        print(e)
    return NULL


if __name__ == '__main__':
    db = connectDB()
    mes()

 

 

 

 

 

 

 

 

 

 

我也忘了当时是参考了哪篇博客了,反正作者看着,联系本人删除即可

 

上一篇:【Serde】结构体转 JSON


下一篇:HDFS读写流程(史上最精炼详细)