python爬虫——爬取旅游城市信息

import requests
from bs4 import BeautifulSoup
from pymongo import MongoClient

class QuNaEr():
    def __init__(self, keyword, page=1):
        self.keyword = keyword
        self.page = page

    def qne_spider(self):
        url = 'https://piao.qunar.com/ticket/list.htm?keyword=%s&region=&from=mpl_search_suggest&page=%s' % (self.keyword, self.page)
        response = requests.get(url)
        response.encoding = 'utf-8'
        text = response.text
        bs_obj = BeautifulSoup(text, 'html.parser')

        arr = bs_obj.find('div', {'class': 'result_list'}).contents
        for i in arr:
            info = i.attrs
            name = info.get('data-sight-name')
            address = info.get('data-address')
            count = info.get('data-sale-count')
            point = info.get('data-point')

            price = i.find('span', {'class':'sight_item_price'})
            price = price.find_all('em')
            price = price[0].text

            conn = MongoClient('localhost', port=27017)
            db = conn.QuNaEr
            table = db.qunaer_51

            table.insert_one({
                'name' : name,
                'address' : address,
                'count' : int(count),
                'point' : point,
                'price' : float(price),
                'city' : self.keyword
            })

if __name__ == '__main__':
    citys = ['北京', '上海', '成都', '三亚', '广州', '重庆', '深圳', '西安', '杭州', '厦门', '武汉', '大连', '苏州']
    for i in citys:
        for page in range(1,5):
            qne = QuNaEr(i, page=page)
            qne.qne_spider()
上一篇:linux最基础最常用的命令快速手记 — 更实用的排查问题命令


下一篇:python百度关键词相关搜索采集,链轮查询采集相关关键词工具exe