requests的基本使用

xpath-xml路径语言.py

#xpath
'''
专门在xml中查找信息的语言
使用时加上 from lxml import etree

'''
from lxml import etree
#方法一
text = '''
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>我的学习</title>
</head>
<body>
    <ul class="menu>
        <li><a href = "/a/b/c/java/">java工程师</li>
        <li><a href = "/a/b/c/c/">c工程师</li>
        <li><a href = "/a/b/c/python/">python工程师</li>
        <li><a href = "/a/b/c/ai/">ai工程师</li>
    </ul>
    <div class = "teacher">
        <ul>
            <li><a href = "/a/b/c/java/">j工程师</li>
            <li><a href = "/a/b/c/c/">c工程师</li>
            <li><a href = "/a/b/c/python/">p工程师</li>
            <li><a href = "/a/b/c/ai/">a工程师</li>
        </ul>
    </div>
</body>
</html>
'''

#使用etree解析html字符串
html = etree.HTML(text)

print(html)
# #提取数据
# r = html.xpath('/html/body/ul/li[1]/a/text()')
# print(r)
# 获取所有li里面的数据
# rr = html.xpath('//li/a/text()')
# print(rr)

#获取指定标签里面li的数据
r = html.xpath('//div[@class="teacher"]//li/a/text()')
print(r)

h= html.xpath('//div[@class="teacher"]//li/a/@href')
print(h)

print(*zip(r,h))




# #方法二:读取一个html文件并解析
#
# html = etree.parse('./testt.html',etree.HTMLParser())
#
# print(html)
# result = etree.tostring(html)
# print(result)
# print(result.decode('utf-8'))
# r = html.xpath('/html/body/ul/li/a/text()')
# print(r)

requests的基本使用.py

# -*- coding=utf-8 -*-
import requests

#定义请求的url

#url = 'https://www.baidu.com/'

#url = 'http://www.xicidaili.com/nn/'
url = 'https://b.faloo.com/'

header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"
}

#发送请求
res = requests.get(url= url)

# #获取响应结果
# print(res)
#print(res.content) #b'.......'二进制文本流
print(res.encoding)
res.encoding = 'gbk'
print(res.encoding)
print(res.text)
#获取响应状态码
code = res.status_code
print(code)

#响应成功后把响应内容写入文件
if code == 200:
    with open("D:\\zhangt\\pystudy\\pywenjian\\pythonProject\\crawweb\\test.html","w") as fp:
        fp.write(res.text)


上一篇:利用requests以及etree爬取京东首页30部手机信息并分别存储csv文件以及excel文件


下一篇:Python爬虫XPath解析后保存CSV文件乱码的问题