提升sentinelsat下载速度,放弃了断点续传,但是不影响下载进度,反而下载速度大大提升。修改原本的_download()方法,其余功能不变。
调用方法:
# api=sentinelsat.SentinelAPI(
api = SubSentinelAPI(
user="****",
password="*******"
)
api.downlaod()
# -*- coding: utf-8 -*-
"""
@File : sub_sentinel_api.py
@Time : 2021/2/25 11:38
@Author : Zhufeng
@Email : zhuf_py@163.com
@Software: PyCharm
"""
from operator import itemgetter
import sentinelsat
from contextlib import closing
from sentinelsat.sentinel import _check_scihub_response
from concurrent.futures import ThreadPoolExecutor, as_completed, wait
import time
import random
class SubSentinelAPI(sentinelsat.SentinelAPI):
def _worker(self, url, session, headers, progress, i):
while True:
try:
with closing(session.get(url, stream=True, auth=session.auth, headers=headers, timeout=self.timeout)) as r:
_check_scihub_response(r, test_json=False)
byte = r.raw.read()
# print(i, len(byte), headers)
except Exception as e:
# print(e)
byte = None
time.sleep(random.uniform(0, 1))
if byte:
progress.update(len(byte))
return i, byte
def _download(self, url, path, session, file_size):
self._file_size = file_size
# 不支持断点续传
# continuing = exists(path)
# if continuing:
# already_downloaded_bytes = getsize(path)
# else:
# already_downloaded_bytes = 0
downloaded_bytes = 0
chunk = 2 ** 20
max_worker = 12
if hasattr(self, "setting_chunk"):
chunk *= int(self.setting_chunk)
if hasattr(self, "max_worker"):
max_worker = self.max_worker
all_tasks = []
num = file_size // chunk
with ThreadPoolExecutor(max_worker) as executor, closing(self._tqdm(desc="Downloading", total=self._file_size, unit="B", unit_scale=True, initial=0)) as progress:
for i in range(num):
start = chunk * i
if i == num - 1:
end = file_size
else:
end = start + chunk - 1
headers = {"Range": "bytes={}-{}".format(start, end)}
all_tasks.append(executor.submit(self._worker, url, self.session, headers, progress, i))
wait(all_tasks)
byte_data = []
for future in as_completed(all_tasks):
i, data = future.result()
byte_data.append({"index": i, "data": data})
byte_data.sort(key=itemgetter("index"))
for d in byte_data:
mode = "ab" # if continuing else "wb"
with open(path, mode) as f:
f.write(d['data'])
downloaded_bytes += len(d['data'])
# print(downloaded_bytes, file_size)
# print(time.time() - begin)
# with closing(
# session.get(url, stream=True, auth=session.auth, headers=headers, timeout=self.timeout)
# ) as r, closing(
# self._tqdm(
# desc="Downloading",
# total=file_size,
# unit="B",
# unit_scale=True,
# initial=already_downloaded_bytes,
# )
# ) as progress:
# _check_scihub_response(r, test_json=False)
# chunk_size = 2 ** 20 # download in 1 MB chunks
# mode = "ab" if continuing else "wb"
# with open(path, mode) as f:
# for chunk in r.iter_content(chunk_size=chunk_size):
# if chunk: # filter out keep-alive new chunks
# f.write(chunk)
# progress.update(len(chunk))
# downloaded_bytes += len(chunk)
# Return the number of bytes downloaded
return downloaded_bytes