sentinelsat 多线程下载,提升下载速度(不支持断点续传)

提升sentinelsat下载速度,放弃了断点续传,但是不影响下载进度,反而下载速度大大提升。修改原本的_download()方法,其余功能不变。

调用方法:

# api=sentinelsat.SentinelAPI(
api = SubSentinelAPI(
    user="****",
    password="*******"
)
api.downlaod()

 

# -*- coding: utf-8 -*-
"""                                                                           
    @File    : sub_sentinel_api.py
    @Time    : 2021/2/25 11:38
    @Author  : Zhufeng
    @Email   : zhuf_py@163.com
    @Software: PyCharm 
"""

from operator import itemgetter

import sentinelsat
from contextlib import closing
from sentinelsat.sentinel import _check_scihub_response

from concurrent.futures import ThreadPoolExecutor, as_completed, wait
import time
import random


class SubSentinelAPI(sentinelsat.SentinelAPI):

    def _worker(self, url, session, headers, progress, i):

        while True:
            try:
                with closing(session.get(url, stream=True, auth=session.auth, headers=headers, timeout=self.timeout)) as r:

                    _check_scihub_response(r, test_json=False)

                    byte = r.raw.read()
                    # print(i, len(byte), headers)
            except Exception as e:
                # print(e)
                byte = None

            time.sleep(random.uniform(0, 1))

            if byte:

                progress.update(len(byte))

                return i, byte

    def _download(self, url, path, session, file_size):

        self._file_size = file_size
        # 不支持断点续传
        # continuing = exists(path)
        # if continuing:
        #     already_downloaded_bytes = getsize(path)
        # else:
        #     already_downloaded_bytes = 0

        downloaded_bytes = 0

        chunk = 2 ** 20
        max_worker = 12

        if hasattr(self, "setting_chunk"):
            chunk *= int(self.setting_chunk)
        if hasattr(self, "max_worker"):
            max_worker = self.max_worker

        all_tasks = []

        num = file_size // chunk

        with ThreadPoolExecutor(max_worker) as executor, closing(self._tqdm(desc="Downloading", total=self._file_size, unit="B", unit_scale=True, initial=0)) as progress:

            for i in range(num):
                start = chunk * i
                if i == num - 1:
                    end = file_size
                else:
                    end = start + chunk - 1

                headers = {"Range": "bytes={}-{}".format(start, end)}

                all_tasks.append(executor.submit(self._worker, url, self.session, headers, progress, i))

            wait(all_tasks)

        byte_data = []
        for future in as_completed(all_tasks):
            i, data = future.result()
            byte_data.append({"index": i, "data": data})

        byte_data.sort(key=itemgetter("index"))

        for d in byte_data:
            mode = "ab"  # if continuing else "wb"
            with open(path, mode) as f:
                f.write(d['data'])
                downloaded_bytes += len(d['data'])

        # print(downloaded_bytes, file_size)
        # print(time.time() - begin)

        # with closing(
        #         session.get(url, stream=True, auth=session.auth, headers=headers, timeout=self.timeout)
        # ) as r, closing(
        #     self._tqdm(
        #         desc="Downloading",
        #         total=file_size,
        #         unit="B",
        #         unit_scale=True,
        #         initial=already_downloaded_bytes,
        #     )
        # ) as progress:
        #     _check_scihub_response(r, test_json=False)
        #     chunk_size = 2 ** 20  # download in 1 MB chunks
        #     mode = "ab" if continuing else "wb"
        #     with open(path, mode) as f:
        #         for chunk in r.iter_content(chunk_size=chunk_size):
        #             if chunk:  # filter out keep-alive new chunks
        #                 f.write(chunk)
        #                 progress.update(len(chunk))
        #                 downloaded_bytes += len(chunk)
        #       Return the number of bytes downloaded
        return downloaded_bytes
上一篇:Spring boot 启动过程解析 logback


下一篇:类加载过程