soupR.py 代码例如以下
# -*- coding: cp936 -*-
import urllib
import urllib2
import os, re
from BeautifulSoup import BeautifulSoup localDir = 'D:/test/'
URL = "http://cran.dataguru.cn/bin/windows/contrib/3.1/"
page = urllib2.urlopen(URL)
soup = BeautifulSoup(page)
page.close() for a in soup.findAll('a'):
if a is not None:
text = a.text.strip()
#if re.search('map',text):
if text.startswith('map'):
href = a.get('href')
if href.endswith('.zip'):
#print href
localFile = localDir + text
if os.path.exists(localFile):
print localFile,'is exists'
else:
try:
# 依照url进行下载文件,并保存到本地文件夹
urllib.urlretrieve(URL+href,localFile)
print href,'is download'
except Exception,e:
continue