循环调用接口,出现 Connection aborted.‘, ConnectionAbortedError(10053,你的主机中的软件中止了一个已建立的连接。

# -*- coding: utf-8 -*-
# @Time : 2022/1/11 13:36
import os

import requests
from pathlib import Path
import time


def mkdir(path):
    folder = os.path.exists(path)

    if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
        os.makedirs(path)  # makedirs 创建文件时如果路径不存在会创建这个路径


def get_excel(dir_paths):
    """
    pdf文件所在目录
    :param dir_paths:
    :return:
    """
    url = "http://192.168.1.31:56782/table_extract/"
    # 获得所有文件
    dir_path = Path(dir_paths)
    file_paths = list(dir_path.glob('*.*'))
    file_paths = [file_path for file_path in file_paths if file_path.suffix.lower() in ['.pdf']]
    # 循环调用接口
    for file_path_index, file_path in enumerate(file_paths):
        print("第{}份文件开始==========================================".format(file_path_index + 1))
        print('     [{0} / {1}] 服务开始 {2}...'.format(file_path_index + 1, len(file_paths), file_path))
        start_time = time.time()

        # file_path = '/Users/jiongjiongai/data/alpha_insight/ocr/kp/ganggu/港股繁体报告/2021123000355_c.pdf'
        file_path = Path(file_path)

        xlsx_file_path = file_path.with_suffix('.xlsx')
        xlsx_name = xlsx_file_path.name
        # xlsx_file_dir = r"{}\excel_result".format(dir_path)
        xlsx_file_dir = dir_path.joinpath('excel_result')
        # xlsx_file = xlsx_file_dir.joinpath(xlsx_name)

        # 新建文件夹
        mkdir(xlsx_file_dir)

        payload = {}
        files = [
          ('file', (file_path.name, open(str(file_path), 'rb'), 'application/pdf'))
        ]
        headers = {'Connection': 'close'}
        # with requests.Session() as session:
        session = requests.session()
        response = session.post(url, headers=headers, data=payload, files=files, timeout=600)
        requests.session().close()

        # 新建文件
        # xlsx_file = r"{}\excel_result\{}".format(dir_path, xlsx_name)
        xlsx_file = xlsx_file_dir.joinpath(xlsx_name)
        with open(str(xlsx_file), 'wb') as f:
            f.write(response.content)

        duration_sec = time.time() - start_time

        print('     [{0} / {1}] 服务结束 {2} with duration: {3} minutes.'.format(file_path_index + 1, len(file_paths), file_path, int(duration_sec / 60)))
        print("     第{}份文件结束!!!用时{}min".format(file_path_index + 1, int(duration_sec / 60)))


if __name__ == '__main__':
    star_time = time.time()
    get_excel(r'F:\财富趋势')
    end_time = time.time()
    print("总计用时:{}min".format(round(int(end_time - star_time) / 60, 2)))

url没写对,少个/,记录一下,搞死了。。。。操

上一篇:ElementUI 表数据导出


下一篇:放球