1. 数据库笔记
建立数据库mariadb_learn,并且在userinfor表里面写入20个用户的账号和密码信息。代码如下
import pymysql
#随机生成20条用户数据; 列表里面嵌套元组;
list_infor = [(‘user‘+str(num),‘00000‘)for num in range(1,21)]
dbconfig = {
‘host‘ :‘localhost‘,
‘user‘ : ‘root‘,
‘passwd‘ : ‘123456‘,
‘db‘ :‘mariadb_learn‘
}
# 1. 连接数据库连接
conn = pymysql.connect(**dbconfig)
# 2. 创建游标, 给数据库发送sql指令
cur = conn.cursor()
try:
insert_sqli = ‘insert into userinfor values(%s,%s)‘
cur.executemany(insert_sqli,list_infor)
except Exception as Error:
print(‘sql exrcutemany failed:‘,Error)
else:
print(‘sql executeamny success!‘)
# 4. 提交sql语句, 作用于数据库;
conn.commit()
# 5. 先关闭游标
cur.close()
# 6. 关闭数据库连接
conn.close()
输出
sql executeamny success!
再次使用executemany()函数将前5到15条用户信息的密码更改为123456。
import pymysql
dbconfig = {
‘host‘ :‘localhost‘,
‘user‘ : ‘root‘,
‘passwd‘ : ‘123456‘,
‘db‘ :‘mariadb_learn‘
}
#随机生成20条用户数据; 列表里面嵌套元组;
list_infor = [(‘user‘+str(num),‘00000‘)for num in range(1,21)]
update_infor = [userinfor[0] for userinfor in list_infor[4:15]]
#1.数据库连接
conn = pymysql.connect(**dbconfig)
#2.创建游标,用于给数据库发送sql指令
cur = conn.cursor()
#3.执行sql语句 : 一次性插入多条信息
try:
update_sqli = ‘update userinfor set passwd = "13324" where username = %s‘
cur.executemany(update_sqli, update_infor)
except Exception as Error:
print(‘sql update failed‘,Error)
else:
print(‘sql update success‘)
#4 . 提交sql语句,作用于数据库
conn.commit()
#5.关闭游标
cur.close()
#6.关闭数据库
conn.close()
输出:
sql update success
2. 利用爬虫爬取最好大学网上对于2018年全国各大高校的各项指标的排名及综合状况:
import requests, csv
from bs4 import BeautifulSoup
res_univer = requests.get(‘http://www.zuihaodaxue.com/zuihaodaxuepaiming2017.html‘)
res_univer.encoding = ‘utf-8‘
bs_univer = BeautifulSoup(res_univer.text, ‘html.parser‘)
list_all_univer = bs_univer.find_all(‘tr‘)[1:501]
count = 0
list_data = []
with open(‘university.csv‘, ‘w‘, encoding=‘gbk‘) as file_1:
writer = csv.writer(file_1)
writer.writerow([‘排名‘,‘学校名称‘,‘省市‘,‘总分‘,‘生源质量‘,‘培养结果‘,‘科研规模‘,‘科研质量‘,‘顶尖成果‘,‘顶尖人才‘,‘科技服务‘,‘成果转化‘,‘学生国际化‘])
for list_a_univer in list_all_univer:
count += 1
list_data.append(count)
data = list_a_univer.find_all(‘td‘)
for x in range(1,13):
list_data.append(data[x].text)
writer.writerow(list_data)
list_data = []
这是爬取了其中对前600名高等院校的“培训规模”的情况,最终我们将数据导入一个文本文件中(.txt):
查询“广东技术师范学院”的排名和得分信息,我们有下面的代码:
import requests from bs4 import BeautifulSoup allUniv=[] def getHTMLText(url): try: r=requests.get(url,timeout=30) r.raise_for_status() r.encoding = ‘utf-8‘ return r.text except: return "" def fillUnivList(soup): data = soup.find_all(‘tr‘) for tr in data: ltd = tr.find_all(‘td‘) if len(ltd)==0: continue singleUniv = [] for td in ltd: singleUniv.append(td.string) allUniv.append(singleUniv) def printUnivList(num): a="广东技术师范学院" print("{1:^4}{2:{0}^8}{3:{0}^6}{4:{0}^6}{5:{0}^8}".format((chr(12288)),"排名","学校名称","省市","总分","培训规模")) for i in range(num): u=allUniv[i] if a in u: print("{1:^4}{2:{0}^10}{3:{0}^5}{4:{0}^8.1f}{5:{0}^10}".format((chr(12288)),u[0],u[1],u[2],eval(u[3]),u[6])) def main(num): url=‘http://www.zuihaodaxue.cn/zuihaodaxuepaiming2018.html‘ html = getHTMLText(url) soup = BeautifulSoup(html,"html.parser") fillUnivList(soup) printUnivList(num) main(600)
于是我们在海量的信息中就能够找到我们学校的排名得分啦~
进一步地,我们调查一下广东省的高校的排名和得分情况,顺便对其中一项指标进行比较,只要将上述的代码中a这个参数的变量改为“广东”,即寻找所有关于广东省内的院校就能得到,同时改变爬虫爬取的内容:
def printUnivList(num): a="广东技术师范学院" print("{1:^4}{2:{0}^8}{3:{0}^6}{4:{0}^6}{5:{0}^8}".format((chr(12288)),"排名","学校名称","省市","总分","培训规模")) for i in range(num): u=allUniv[i] if a in u: print("{1:^4}{2:{0}^10}{3:{0}^5}{4:{0}^8.1f}{5:{0}^10}".format((chr(12288)),u[0],u[1],u[2],eval(u[3]),u[6]))
于是有下述结果: