使用pyhon爬取中国银行保险监督管理委员会的保险许可证

import requests
import json
import time
import random
import math
from retry import retry

address_list = ['临沂市', '南宁市', '保定市', '廊坊市', '阜阳市', '蚌埠市', '滁州市', '马鞍山市', '芜湖市', '安庆市', '六安市', '莆田市', '漳州市', '宁德市', '清远市', '潮州市', '汕头市', '揭阳市', '江门市', '肇庆市', '湛江市', '遵义市', '邯郸市', '唐山市', '秦皇岛市', '邢台市', '沧州市', '大庆市', '洛阳市', '新乡市', '南阳市', '商丘市', '周口市', '驻马店市', '信阳市', '荆州市', '宜昌市', '衡阳市', '岳阳市', '株洲市', '九江市', '赣州市', '上饶市', '宜春市', '连云港市', '宿迁市', '淮安市', '盐城市', '扬州市', '泰州市', '镇江市', '淄博市', '威海市', '济宁市', '菏泽市', '咸阳市', '绵阳市', '湖州市', '舟山市', '海口市', '三亚市', '桂林市', '柳州市', '呼和浩特市', '银川市', '乌鲁木齐市', '襄阳市', '三明市', '茂名市', '德州市']

url = r"http://xkz.cbirc.gov.cn/bx/getLicence.do?useState=3"
data = {
    "start": "10",
    "limit": "10"
}
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
    "Origin": r"http://xkz.cbirc.gov.cn",
    "Referer": r"http://xkz.cbirc.gov.cn/bx/",
    "Cookie": r"isClick=true; JSESSIONID=0000yAISFDk5epshBvgldsOaQRQ:-1",
    'Connection':'close'
}
params = {
    "address":"XXX"
}


@retry(tries=7)
def get_baoxian(address):
    all_data = []
    params["address"] = address
    
   
    r = requests.post(url=url, data=data, headers=headers, params = params, proxies = proxies, timeout=1)    
    r.encoding = 'utf-8'
    all = json.loads(r.text)
    total = all["total"]/10
    count = math.ceil(total)
    params["address"] = address
    for i in range(count):
        print(address, "已经开始第", i, "页")
        temp_data = get_single_baoxian(count, address, i)
        all_data.extend(temp_data)     
    return all_data    

@retry(tries=7)
def get_single_baoxian(count, address, i):
    all_data = []        
    data["start"] = str(i*10)
    r2 = requests.post(url=url, data=data, headers=headers, params = params, proxies = proxies, timeout=1)
    r2.encoding = 'utf-8'
    all2 = json.loads(r2.text)
    for i in range(len(all2["datas"])):
            single_data = "机构编码:{},证件流水号:{},机构名称:{},批准成立日期:{},发证日期:{},城市:{}".format(all2["datas"][i]["certCode"], all2["datas"][i]["flowNo"],
                                                                  all2["datas"][i]["fullName"], all2["datas"][i]["setDate"],
                                                                  all2["datas"][i]["printDate"], address)
            all_data.append(single_data)
    return all_data

def save(data):
    for i in data:
        with open("中国银行保险监督管理委员会1.txt", "a") as f:
            f.write(i)
            f.write("\n")



for address in address_list:
    baoxian_data = get_baoxian(address)
    save(baoxian_data)
    print(address, "已完成")
上一篇:11月16号


下一篇:web自动化进阶