pandas读取excel 地址列调用api,取出想要的数据重新生成拼装,生成新的数据并写入excel,多线程跑,需要保证不串行
import json import time import pandas as pd import requests from concurrent.futures import ThreadPoolExecutor class RunData(object): def __init__(self, filepath): self.df = pd.read_excel(filepath, index_col="序号") self.url = r"https://xxxxxxxxxxxxxxx" for i in ["id", "name", "type", "address", "location"]: self.df[i] = "" self.url_list = [] def readExcel(self, ): index = self.df["原地址"].index.tolist() addrs = self.df["原地址"].tolist() return list(zip(index, addrs)) def send_poi(self, addr): params = { "key": "xxxxxxxxx", "keywords": addr[1], } res = requests.get(url=self.url, params=params) print(res.url) self.url_list.append(res.url) res = json.loads(res.text) if res.get("info") == "OK": if res.get("pois"): self.df["id"].at[addr[0]] = res.get("pois")[0].get("id") self.df["name"].at[addr[0]] = res.get("pois")[0].get("name") self.df["type"].at[addr[0]] = res.get("pois")[0].get("type") self.df["address"].at[addr[0]] = res.get("pois")[0].get("address") self.df["location"].at[addr[0]] = res.get("pois")[0].get("location") print(f"==========================已完成{len(self.url_list)}==========================") def writeExcel(self): self.df.to_excel("a.xlsx") def __str__(self): return f"{self.df.to_string()}" if __name__ == '__main__': start = time.time() data = RunData(filepath=r"C:\Users\fengzi\Desktop\test.xlsx") addrs = data.readExcel() with ThreadPoolExecutor(max_workers=20, thread_name_prefix="test-") as pool: pool.map(data.send_poi, addrs) data.writeExcel() end = time.time() print("花费时间为:%ss" % (end - start))