担心人人网会黄掉,写个爬虫,把我的相册照片都下载下来。代码如下:
# -*- coding: utf-8 -*- import requests import json import os def mkdir(path): path=path.strip() path=path.rstrip("\\") isExists=os.path.exists(path) if not isExists: print path+u' 创建成功' os.makedirs(path) return "yes" else: print path+u' 目录已存在' return "no" def login_renren(s): origin_url = 'http://www.renren.com' login_data = { 'email':'用户名', 'domain':'renren.com', 'origURL':'http://www.renren.com/home', 'key_id':'1', 'captcha_type':'web_login', 'password':'密码抓包获得', 'rkey':'rkey抓包获得' } r = s.post("http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2016742045262", data = login_data) if 'true' in r.content: print u'登录人人网成功' return s
def get_albums(s): r = s.get('http://photo.renren.com/photo/278382090/albumlist/v7?showAll=1#') #print r.content content = r.content index1 = content.find('nx.data.photo = ') #print index1 index2 = content.find('nx.data.hasHiddenAlbum =') #print index2 target_json = content[index1+16:index2].strip() target_json = target_json[0:len(target_json)-1] #print target_json data = json.loads(target_json.replace("\'", '"')); album_list = data['albumList'] album_count = album_list['albumCount'] tip = u'一共有'+str(album_count)+u'个相册' print tip album_ids = [] for album in album_list['albumList']: #print album['albumName'] album_ids.append(album['albumId']) return album_ids,s def download_albums(album_ids,s): #访问相册 for album_id in album_ids: album_url = 'http://photo.renren.com/photo/278382090/album-'+album_id+'/v7' r = s.get(album_url) if "photoId" in r.content: print u'进入相册成功' #print r.content content = r.content index1 = content.find('nx.data.photo = ') #print index1 index2 = content.find('; define.config') #print index2 target_json = content[index1+16:index2].strip() target_json = target_json[13:len(target_json)-2] #print target_json data = json.loads(target_json.replace("\'", '"')); photos = data['photoList'] album_name = data['albumName'] # 定义并创建目录 album_path = 'd:\\'+album_name #print album_path if mkdir(album_path)=='yes': for photo in photos: #print photo['url'] image_name = photo['photoId'] photo_url = photo['url'] r = requests.get(photo_url) image_path = album_path+'/'+image_name+'.jpg' f = open(image_path, 'wb') f.write(r.content) f.close() tip = u'相片'+image_name+u'下载成功' print tip else: print u'相册已经下载' #执行该文件的主过程 if __name__ == '__main__': #创建requests会话 s = requests.Session() #登录人人网 s = login_renren(s) #获取相册列表 album_ids,s = get_albums(s) #下载相册 download_albums(album_ids,s)
搞定!运行效果如下: