需要:MySQLdb
下面是数据表结构:
/*
Navicat MySQL Data Transfer Source Server : 127.0.0.1
Source Server Version : 50509
Source Host : 127.0.0.1:3306
Source Database : wooyun Target Server Type : MYSQL
Target Server Version : 50509
File Encoding : 65001 Date: 2015-09-24 17:38:14
*/ SET FOREIGN_KEY_CHECKS=0; -- ----------------------------
-- Table structure for wooyun_vul
-- ----------------------------
DROP TABLE IF EXISTS `wooyun_vul`;
CREATE TABLE `wooyun_vul` (
`id` int(8) NOT NULL AUTO_INCREMENT,
`corpsname` varchar(255) DEFAULT NULL,
`corpsurl` varchar(255) DEFAULT NULL,
`vulcount` int(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
python 脚本:
#conding=utf-8
import urllib2
import urllib
import re
import MySQLdb url = "http://wooyun.org/corps/page/"
def getWooyuncorps(url):
request = urllib2.Request(url)
request.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36')
reponse = urllib2.urlopen(request)
content = reponse.read()
pattern1 = re.compile(r'<td width="370"><a href="\/corps\/(.*?)">.*?<\/a><\/td>')
pattern2 = re.compile(r'<a rel="nofollow" href="(.*?)" target=')
corps = pattern1.findall(content)
corpsUrl = pattern2.findall(content)
return corps,corpsUrl def getcorpscount(url):
request = urllib2.Request(url)
request.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36')
reponse = urllib2.urlopen(request)
content = reponse.read()
pattern = re.compile(r'<p class="page">.*?(\d+).*')
count = pattern.findall(content)
return count corpslist = []
corpsurllist = []
countlist = []
for i in range(1,37):
corps,corpsUrl = getWooyuncorps(url+str(i))
for corp in corps:
corpslist.append(corp)
for urls in corpsUrl:
corpsurllist.append(urls)
print len(corpslist),len(corpsurllist) for i in range(0,len(corpslist)):
newurl = "http://www.wooyun.org/corps/"+urllib.quote(corpslist[i])
#print newurl
count = getcorpscount(newurl)
#print count
for countA in count:
countlist.append(countA) #print len(countlist)
conn = MySQLdb.connect('localhost','root','','wooyun')
cur = conn.cursor()
sql = "set names 'utf8'"
cur.execute(sql)
conn.commit() for s in range(0,len(countlist)):
sql = 'insert into wooyun_vul(corpsname,corpsurl,vulcount) values("%s","%s",%d)' %(corpslist[s],corpsurllist[s],int(countlist[s]))
print sql
cur.execute(sql)
conn.commit() conn.close()
print "success"