import requests from bs4 import BeautifulSoup import threading # 线程锁 thread_lock = threading.BoundedSemaphore(value=10) def get_3gppurl(): urllist = [] url = 'https://www.3gpp.org/ftp/Specs/archive/38_series/' response = requests.get(url).content.decode('utf-8') bs = BeautifulSoup(response, 'html.parser').find('tbody').find_all('a') for b in bs: u = b['href'] urllist.append(u) return urllist def download_zip(u, t): res = requests.get(u) path = './3gpp-TS38.xx/' + str(t) with open(path, 'wb+') as file: file.write(res.content) # 解锁 thread_lock.release() def get_zip(url1): response = requests.get(url1).content.decode('utf-8') bs = BeautifulSoup(response, 'html.parser').find('tbody').find_all('a') for b in bs: u = b['href'] t = b.text print(u, t) # 上锁, thread_lock.acquire() t = threading.Thread(target=download_zip, args=(u, t)) t.start() if __name__ == '__main__': urllist = get_3gppurl() for url1 in urllist: print(url1) get_zip(url1)
# 打印结果
# 下载结果