有关ajax的请求
get请求
下载豆瓣电影的第一页电影数据,一页有二十个。
import urllib.request
url = 'https://movie.douban.com/j/chart/top_list?type=25&interval_id=100%3A90&action=&start=0&limit=20'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
# 1.请求对象的定制
request = urllib.request.Request(url=url,headers=headers)
# 2.获取响应数据
res = urllib.request.urlopen(request)
content = res.read().decode('utf-8')
# 3.数据下载到本地
# open方法默认情况下使用gbk编码。若想保存汉字,需指定编码格式
fp = open('douban.json','w',encoding='utf-8')
fp.write(content)
fp.close()
'''
17与18行的等价语句
with open('douban.json','w',encoding='utf-8') as fp:
fp.write(content)
'''
下载豆瓣电影的前十页电影数据
'''
以下三个分别是第一页,第二页,第三个的Request URL
https://movie.douban.com/j/chart/top_list?type=25&interval_id=100%3A90&action=&start=0&limit=20
https://movie.douban.com/j/chart/top_list?type=25&interval_id=100%3A90&action=&start=20&limit=20
https://movie.douban.com/j/chart/top_list?type=25&interval_id=100%3A90&action=&start=40&limit=20
'''
import urllib.request
import urllib.parse
# 请求对象的定制
def createRequest(page):
base_url = 'https://movie.douban.com/j/chart/top_list?type=25&interval_id=100%3A90&action=&'
data = {
'start':(page - 1)*20,
'limit':20
}
data = urllib.parse.urlencode(data)
url = base_url+data
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
request = urllib.request.Request(url=url,headers=headers)
return request
# 获取响应数据
def getContent(request):
res = urllib.request.urlopen(request)
content = res.read().decode('utf-8')
return content
# 下载数据
def Download(content,page):
with open('douban'+str(page)+'.json','w',encoding='utf-8') as fp:
fp.write(content)
if __name__ == '__main__':
start_page = 1
end_page = 10
for page in range(start_page,end_page+1):
req = createRequest(page)
content = getContent(req)
Download(content,page)
post请求
进入肯德基官网,点下角的餐厅查询,查看店铺位置信息。代码主要爬取在北京的肯德基店铺位置。
import urllib.request
import urllib.parse
def createRequest(page):
base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
data = {
'cname': '北京',
'pid':'',
'pageIndex': page,
'pageSize': '10'
}
data = urllib.parse.urlencode(data).encode('utf-8')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
request = urllib.request.Request(url=base_url,data=data,headers=headers)
return request
def getContent(request):
res = urllib.request.urlopen(request)
content = res.read().decode('utf-8')
return content
def Download(content,page):
with open('kfc_'+str(page)+'.json','w',encoding='utf-8') as fp:
fp.write(content)
if __name__ == '__main__':
start_page = 1
end_page = 10
for page in range(start_page,end_page+1):
request = createRequest(page)
content = getContent(request)
Download(content,page)
'''
第一页
http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname
cname: 北京
pid:
pageIndex: 1
pageSize: 10
第二页
http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname
cname: 北京
pid:
pageIndex: 2
pageSize: 10
'''