pandas笔记

pandas读取excel   地址列调用api,取出想要的数据重新生成拼装,生成新的数据并写入excel,多线程跑,需要保证不串行

import json
import time

import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor


class RunData(object):
    def __init__(self, filepath):
        self.df = pd.read_excel(filepath, index_col="序号")
        
        self.url = r"https://xxxxxxxxxxxxxxx"
        for i in ["id", "name", "type", "address", "location"]:
            self.df[i] = ""
        self.url_list = []
    
    def readExcel(self, ):
        index = self.df["原地址"].index.tolist()
        addrs = self.df["原地址"].tolist()
        return list(zip(index, addrs))
    
    def send_poi(self, addr):
        params = {
            "key": "xxxxxxxxx",
            "keywords": addr[1],
        }
        res = requests.get(url=self.url, params=params)
        print(res.url)
        self.url_list.append(res.url)
        res = json.loads(res.text)
        if res.get("info") == "OK":
            if res.get("pois"):
                self.df["id"].at[addr[0]] = res.get("pois")[0].get("id")
                self.df["name"].at[addr[0]] = res.get("pois")[0].get("name")
                self.df["type"].at[addr[0]] = res.get("pois")[0].get("type")
                self.df["address"].at[addr[0]] = res.get("pois")[0].get("address")
                self.df["location"].at[addr[0]] = res.get("pois")[0].get("location")
        print(f"==========================已完成{len(self.url_list)}==========================")
    
    def writeExcel(self):
        self.df.to_excel("a.xlsx")
    
    def __str__(self):
        return f"{self.df.to_string()}"


if __name__ == '__main__':
    start = time.time()
    data = RunData(filepath=r"C:\Users\fengzi\Desktop\test.xlsx")
    addrs = data.readExcel()
    with ThreadPoolExecutor(max_workers=20, thread_name_prefix="test-") as pool:
        pool.map(data.send_poi, addrs)
    data.writeExcel()
    end = time.time()
    print("花费时间为:%ss" % (end - start))

 

上一篇:[转] Oracle sql语句执行顺序


下一篇:C# 流不可读 解决办法