day06

目录

一.if判断条件

1.单分支结构

a=100
if a<20:
    print ("比20小")
if a>80:
    print ("比80大")
if a>=100:
    print ("比100大")

2.双分支结构

a=20
if (a>20):
    print ("比20大")
else:
    print ("比20小")

3.多分支结构

age1=20
age=int(input("请输入年龄:"))
if (age>age1):
    print ("猜大了")
elif (age<age1):
    print ("猜小了")
else:
    print ("猜对了")

二.for循环结构

1. for+break结构

中断本层循环

2. for+continue结构

中断本次循环

举例:循环打出1月1日~12月31日,包括闰年,还有30天31天的月份
# -*- coding: utf-8 -*-
# @Time    : 2019/8/29 9:33
# @Author  : yqliu

year=int(input("请输入年份:"))

#判断是否为闰年
if year%4:
    flag="not_rn"
    print (flag)
else:
    flag="rn"
    print (flag)


for i in range(1,13): #月份循环
    for j in range(1,32): #日循环
        #首先判断2月的天数
        if i==2 and flag=='rn' and j>29:
            break
        elif i==2 and flag=='not_rn' and j>28:
            break
        elif i in (4,6,9,11) and j>30:
            break
        else:
            print (f'{i}月{j}日')

三.爬虫

爬虫三部曲

  1. 发送请求
  2. 解析数据
  3. 保存数据

爬虫精髓

  1. 分析网站的通信流程
  2. 分析查找数据从何而来
  3. 分析网站的反扒策略
  4. 根据网站的反爬策略编写攻击手段,获取数据
#爬取图片
# -*- coding: utf-8 -*-
# @Time    : 2019/8/29 14:37
# @Author  : yqliu

import requests

#发送请求
reponse=requests.get('https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1567070792548&di=ad264a92813a13f06d9f0177fff0ad4b&imgtype=0&src=http%3A%2F%2Fimg1d.xgo-img.com.cn%2Fpics%2F1549%2F1548964.jpg')

#图片和音频视频的都用content
print (reponse.content)

with open('1.jpg','wb') as f:
    f.write(reponse.content)
    print ("图片下载成功")
#爬取视频
# -*- coding: utf-8 -*-
# @Time    : 2019/8/29 14:42
# @Author  : yqliu

import requests

#发起请求
reponse=requests.get('https://vd3.bdstatic.com/mda-jf3ubgm3ia8kje3g/sc/mda-jf3ubgm3ia8kje3g.mp4?auth_key=1567062456-0-0-da2498091628bf93752129e101e3b6fe&bcevod_channel=searchbox_feed&abtest=all')


print (reponse.content)

with open('1.mp4','wb') as f:
    f.write(reponse.content)

print ("视频下载成功")
#爬取豆瓣电影top250
# -*- coding: utf-8 -*-
# @Time    : 2019/8/29 14:48
# @Author  : yqliu

import requests
import re
'''
第一页
https://movie.douban.com/top250
第二页
https://movie.douban.com/top250?start=25&filter=
最后一页


'''

num=0
for i in range(0,10):
    url=f'https://movie.douban.com/top250?start={num}&filter='
    num+=25
    print (url)
    reponse=requests.get(url=url)
    print (reponse.text)
    movie_list=re.findall('<div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>',reponse.text,re.S)
    with open('douban.txt','a',encoding='utf8') as f:
        for line in movie_list:
            movie_url=line[0]
            movie_name=line[1]
            movie_score=line[2]
            movie_count=line[3]
            f.write(movie_url+movie_name+movie_score+movie_count+'\n')

上一篇:MySQL--中间表语句练习


下一篇:Scarpy爬取当当网书籍