发送get 请求,遍历目录下的所有文件,代码中*** 的部分改成实际内容,这个API说明文档在bucket操作里面。
rest请求主要问题在拼header时authorization可能会有问题,注意计算签名时的入参。
#tested env: python version v3.9.6
#author: Fred
#2022-1-11
import hmac
import hashlib
import base64
import datetime
import requests
from scrapy.utils.python import to_bytes
#this function is to get the md5 vaule for a file content,
#input argu: file path of the file you want to upload as the content of http request
#return: string of md5 vaule
#refer to https://www.alibabacloud.com/help/doc-detail/31951.html#section-i74-k35-5w4
def get_md5(content):
hash = hashlib.md5()
hash.update(to_bytes(content))
return base64.b64encode(hash.digest()).decode('utf-8')
#this function is to calculate the signature, which is part of authentication, the info should be same with your http header
# argu refer to the link as below,
#refer to https://www.alibabacloud.com/help/en/doc-detail/31951.html
def get_sig(verb, content_md5, content_type, date, add_info_str, res):
sig_param = verb + '\n' + content_md5 + '\n' + content_type + '\n' + date + '\n' + add_info_str + '\n' + res
h = hmac.new(to_bytes(ak_secret), to_bytes(sig_param) , hashlib.sha1)
return base64.b64encode(h.digest()).decode('utf-8')
#Here is the information provide by Cloud account owner, to access the Cloud resources
#ak_id means AccessKey ID
ak_id = '****'
#aksecret means AccessKey Secret
ak_secret = '****'
#host_addr means the url of the bucket
host_addr = '****.***.aliyuncs.com'
#Here is the infomation to fill the http request header
verb = 'GET'
content_md5 = get_md5('')
content_type = 'text/html'
#get GMT date
date = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
#CanonicalizedResource, destination bucketname and folder
res = '/****/'
#CanonicalizedOSSHeaders, it's optional, if have multiple argu, need \n to seperate each
#https://www.alibabacloud.com/help/doc-detail/31951.html#section-rvv-dx2-xdb
author_info_key = 'x-oss-meta-author'
author_info_value = '***'
add_info_str = author_info_key + ':' + author_info_value
#Here to get the authentication so that our request will be permitted, the authentication info is necessary for http header
auth = "OSS " + ak_id + ":" + get_sig(verb, content_md5, content_type, date, add_info_str, res)
#Here to define the header include the authentication and other infomation
req_header = {
'Host':host_addr,
'Content-Md5':content_md5,
'Content-Type':content_type,
'Date':date,
'Authorization':auth,
author_info_key:author_info_value,
#the params below is for query the files in folder
#https://www.alibabacloud.com/help/en/doc-detail/187544.html
#List-type: this is the version of API, must be 2
'List-type': '2',
# when set Delimiter to /, it will only return sun-folder name but not list the files in the sub-folder, else will also display the files in sub-folder
'Delimiter': '/',
#prefix means the folder name, please take notice of /
'Prefix': '***/'
}
##request url is the bucket addr
req_url ='https://' + host_addr
req = requests.get(req_url, headers = req_header)
#show response
#status should be 200
print(req.status_code)
print(req.headers)
#the content is in xml format,
#refer to: https://www.alibabacloud.com/help/en/doc-detail/187544.html
print(req.content)