OSS restful API 调用 get，遍历目录中的文件，python发http request示例

2022-01-09 04:43:42
发送get 请求，遍历目录下的所有文件，代码中*** 的部分改成实际内容，这个API说明文档在bucket操作里面。
rest请求主要问题在拼header时authorization可能会有问题，注意计算签名时的入参。
#tested env: python version v3.9.6
#author: Fred
#2022-1-11

import hmac
import hashlib
import base64
import datetime
import requests
from scrapy.utils.python import to_bytes

#this function is to get the md5 vaule for a file content,
#input argu: file path of the file you want to upload as the content of http request
#return: string of md5 vaule
#refer to https://www.alibabacloud.com/help/doc-detail/31951.html#section-i74-k35-5w4
def get_md5(content): 
    hash = hashlib.md5()
    hash.update(to_bytes(content))
    return base64.b64encode(hash.digest()).decode('utf-8')

#this function is to calculate the signature, which is part of authentication, the info should be same with your http header
# argu refer to the link as below,
#refer to https://www.alibabacloud.com/help/en/doc-detail/31951.html
def get_sig(verb, content_md5, content_type, date, add_info_str, res):
    sig_param = verb + '\n' + content_md5 + '\n' + content_type + '\n' + date + '\n' + add_info_str + '\n' + res
    h = hmac.new(to_bytes(ak_secret), to_bytes(sig_param) , hashlib.sha1)
    return base64.b64encode(h.digest()).decode('utf-8')

#Here is the information provide by Cloud account owner, to access the Cloud resources
#ak_id means AccessKey ID
ak_id = '****'
#aksecret means AccessKey Secret
ak_secret = '****'
#host_addr means the url of the bucket
host_addr = '****.***.aliyuncs.com'

#Here is the infomation to fill the http request header
verb = 'GET'
content_md5 = get_md5('')
content_type = 'text/html'
#get GMT date
date = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
#CanonicalizedResource, destination bucketname and folder
res = '/****/'
#CanonicalizedOSSHeaders, it's optional, if have multiple argu, need \n to seperate each
#https://www.alibabacloud.com/help/doc-detail/31951.html#section-rvv-dx2-xdb
author_info_key = 'x-oss-meta-author'
author_info_value = '***'
add_info_str = author_info_key + ':' + author_info_value

#Here to get the authentication so that our request will be permitted, the authentication info is necessary for http header
auth = "OSS " + ak_id + ":" + get_sig(verb, content_md5, content_type, date, add_info_str, res)


#Here to define the header include the authentication and other infomation
req_header = {
    'Host':host_addr,
    'Content-Md5':content_md5,
    'Content-Type':content_type,
    'Date':date,
    'Authorization':auth,
    author_info_key:author_info_value,
    
    #the params below is for query the files in folder
    #https://www.alibabacloud.com/help/en/doc-detail/187544.html
    
    #List-type: this is the version of API, must be 2    
    'List-type': '2',
    # when set Delimiter to /, it will only return sun-folder name but not list the files in the sub-folder, else will also display the files in sub-folder
    'Delimiter': '/',
    #prefix means the folder name, please take notice of /
    'Prefix': '***/'
}


##request url is the bucket addr
req_url ='https://' + host_addr
req = requests.get(req_url, headers = req_header)    

#show response
#status should be 200
print(req.status_code)
print(req.headers)

#the content is in xml format,
#refer to: https://www.alibabacloud.com/help/en/doc-detail/187544.html
print(req.content)
码农公寓

相关文章