百度统计API接口Python简易SDK

由于太久没有动笔所以决定瞎发点东西最近在接入百度统计API,虽然没有对响应结果做进一步处理,还是希望能够减少大家的工作量

百度商业账号接口未对接,百度普通账号所有接口已接入,基于第三方库httpx,所有方法都是异步调用的
如果想要快速改成同步调用,只需替换如下几个关键字

  1. "AsyncClient" -> "Client"
  2. "async " -> ""
  3. "await " -> ""


from httpx import AsyncClient, Response


class BaiduConfig:
    # docs https://tongji.baidu.com/api/manual/Chapter2/openapi.html
    CLIENT_ID = '自行申请'
    CLIENT_SECRET = '自行申请'
    REFRESH_TOKEN = '见百度授权文档'
    ACCESS_TOKEN = '见百度授权文档'


class BaiduStat(AsyncClient):
    # docs https://tongji.baidu.com/api/manual/
    site_id: int = None
    fields = {
        'pv_count': '浏览量(PV)', 'pv_ratio': '浏览量占比', 'visit_count': '来源',
        'visitor_count': '访客数(UV)', 'new_visitor_count': '新访客数',
        'new_visitor_ratio': '新访客比率', 'ip_count': 'IP 数',
        'bounce_ratio': '跳出率', 'avg_visit_time': '平均访问时长',
        'avg_visit_pages': '平均访问页数', 'trans_count': '转化次数',
        'trans_ratio': '转化率', 'visit1_count': '入口页次数',
        'outward_count': '贡献下游浏览量', 'exit_count': '退出页次数',
        'average_stay_time': '平均停留时长', 'exit_ratio': '退出率',
        'out_pv_count': '贡献浏览量',
    }
    area = {
        '全国': 'china', '北京': 'province, 1', '上海': 'province, 2',
        '天津': 'province, 3', '广东': 'province, 4', '福建': 'province, 5',
        '海南': 'province, 8', '安徽': 'province, 9', '贵州': 'province, 10',
        '甘肃': 'province, 11', '广西': 'province, 12', '河北': 'province, 13',
        '河南': 'province, 14', '黑龙江': 'province, 15', '湖北': 'province, 16',
        '湖南': 'province, 17', '吉林': 'province, 18', '江苏': 'province, 19',
        '江西': 'province, 20', '辽宁': 'province, 21', '内蒙古': 'province, 22',
        '宁夏': 'province, 23', '青海': 'province, 24', '山东': 'province, 25',
        '山西': 'province, 26', '陕西': 'province, 27', '四川': 'province, 28',
        '*': 'province, 29', '*': 'province, 30', '云南': 'province, 31',
        '浙江': 'province, 32', '重庆': 'province, 33', '香港': 'province, 34',
        '*': 'province, 35', '澳门': 'province, 36'
    }

    async def get(self, url, *args, **kwargs) -> Response:
        # 增加token过期判断逻辑
        resp = await super().get(url, *args, **kwargs)
        if resp.json().get('error_code') == 111:  # Access token expired
            await self.refresh_token()
            resp = await super().get(url, *args, **kwargs)
        return resp

    async def refresh_token(self):
        url = 'https://openapi.baidu.com/oauth/2.0/token'
        params = {
            'grant_type': 'refresh_token', 'client_id': BaiduConfig.CLIENT_ID,
            'client_secret': BaiduConfig.CLIENT_SECRET,
            'refresh_token': BaiduConfig.REFRESH_TOKEN,
        }
        resp = (await self.get(url, params=params)).json()
        access_token = resp.get('access_token')
        refresh_token = resp.get('refresh_token')
        if access_token and refresh_token:
            with open(__file__, 'r+') as f:
                content = f.read()
                f.seek(0)
                f.truncate()
                content = content.replace(BaiduConfig.ACCESS_TOKEN, access_token)
                content = content.replace(BaiduConfig.REFRESH_TOKEN, refresh_token)
                f.write(content)
            BaiduConfig.ACCESS_TOKEN = access_token
            BaiduConfig.REFRESH_TOKEN = refresh_token

    async def get_site_list(self):
        url = 'https://openapi.baidu.com/rest/2.0/tongji/config/getSiteList'
        params = {'access_token': BaiduConfig.ACCESS_TOKEN}
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_site_id(self, domain=None):
        """获取站点id
        :param domain: 不传入时 选择第一个站点
        :return:
        """
        if self.site_id is None:
            site_resp = await self.get_site_list()
            for site in site_resp.get('list', []):
                if domain is None:
                    self.site_id = site['site_id']
                    break
                if site['domain'] == domain:
                    self.site_id = site['site_id']
                    break
        return self.site_id

    async def get_time_trend_report(self, site_id=None, date_range=None):
        """获取站点趋势数据 浏览量PV、访客数UV、IP数
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        """
        metric = [
            'pv_count', 'visitor_count', 'ip_count', 'bounce_ratio',
            'avg_visit_time', 'trans_count'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'overview/getTimeTrendRpt',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_district_report(self, site_id=None, date_range=None):
        """获取访客地域分布 浏览量PV、访客数UV、IP数
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :return:
        """
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'overview/getDistrictRpt',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': 'pv_count'
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_common_track_report(self, site_id=None, date_range=None):
        """获取访客来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :return:
        """
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'overview/getCommonTrackRpt',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': 'pv_count'
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_trend_analyse(self, site_id=None, date_range=None, gran=None,
                                source=None, clientDevice=None, area=None,
                                visitor=None):
        """获取趋势分析
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param gran: 时间粒度 可选值 day/hour/week/month
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param area: 地域过滤 百度文档支持省级行政区 传入"广东"或"province,4"等效
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'avg_visit_time', 'avg_visit_pages', 'trans_count', 'trans_ratio',
            'avg_trans_cost', 'income'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'trend/time/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if area:
            params['area'] = self.area.get(area, area)
        if source:
            params['source'] = source
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        if gran:
            params['gran'] = gran
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_latest_visit(self, site_id=None, date_range=None,
                               source=None, clientDevice=None,
                               visitor=None, area=None):
        """获取实时访客
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :param area: 地域过滤 百度文档支持省级行政区 传入"广东"或"province,4"等效
        :return:
        """
        metric = ['start_time', 'area', 'source', 'access_page', 'keyword',
                  'searchword', 'is_ad', 'visitorId', 'ip', 'visit_time',
                  'visit_pages']
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'trend/latest/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if source:
            params['source'] = source
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        if area:
            params['area'] = self.area.get(area, area)
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_source_all(self, site_id=None, date_range=None,
                             viewType=None, clientDevice=None, visitor=None):
        """获取全部来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param viewType: 分类标准 可选值如下
                    type            按来源分类
                    site            按来源网站
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'source/all/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        if viewType:
            params['viewType'] = viewType
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_source_engine(self, site_id=None, date_range=None,
                                clientDevice=None, area=None, visitor=None):
        """获取搜索引擎来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param area: 地域过滤 百度文档支持省级行政区 传入"广东"或"province,4"等效
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'source/engine/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if clientDevice:
            params['clientDevice'] = clientDevice
        if area:
            params['area'] = self.area.get(area, area)
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_source_keyword(self, site_id=None, date_range=None,
                                 source=None, clientDevice=None, visitor=None):
        """获取搜索词来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'source/searchword/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if source:
            params['source'] = source
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_source_link(self, site_id=None, date_range=None,
                              viewType=None, domainType=None,
                              clientDevice=None, visitor=None):
        """获取外部链接来源
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param viewType: 分类标准 可选值
                    domain      按域名
                    url         按URL
        :param domainType: 域名筛选 可选值
                    1           社会化媒体
                    2           导航网站
                    4           电子邮箱
        :param clientDevice: 设备过滤 可选值
                    pc          计算机
                    mobile      移动设备
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'source/link/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if viewType:
            params['viewType'] = viewType
        if domainType:
            params['domainType'] = domainType
        if clientDevice:
            params['clientDevice'] = clientDevice
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_custom_media(self, site_id=None, date_range=None, flag=None):
        """指定广告跟踪
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param flag: 需要看哪个维度的数据 可选值
                    from        来源
                    plan        计划
                    unit        单元
                    word        关键词
                    idea        创意
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'custom/media/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if flag:
            params['flag'] = flag
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_top_page(self, site_id=None, date_range=None,
                                 source=None, visitor=None):
        """受访页面
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'visitor_count', 'ip_count', 'visit1_count',
            'outward_count', 'exit_count', 'average_stay_time', 'exit_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/toppage/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if source:
            params['source'] = source
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_landing_page(self, site_id=None, date_range=None):
        """入口页面
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :return:
        """
        metric = [
            'visit_count', 'visitor_count', 'new_visitor_count',
            'new_visitor_ratio', 'ip_count', 'out_pv_count',
            'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/landingpage/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_top_domain(self, site_id=None, date_range=None,
                                   source=None, visitor=None):
        """受访域名
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'average_stay_time', 'avg_visit_pages'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/topdomain/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if source:
            params['source'] = source
        if visitor:
            params['visitor'] = visitor
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_district(self, site_id=None, date_range=None,
                                 source=None, visitor=None):
        """地域分布(按省)
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'average_stay_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/district/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if source:
            params['source'] = source
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if visitor:
            params['visitor'] = visitor
        resp = await self.get(url, params=params)
        return resp.json()

    async def get_visit_world(self, site_id=None, date_range=None,
                              source=None, visitor=None):
        """地域分布(按国家)
        :param site_id: 站点ID
        :param date_range: 时间范围 如"20211001-20211007"
        :param source: 来源过滤 可选值
                    through     直接访问
                    search,0    搜索引擎全部
                    link        外部链接
        :param visitor: 访客过滤 可选值
                    new         新访客
                    old         老访客
        :return:
        """
        metric = [
            'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
            'new_visitor_count', 'new_visitor_ratio', 'ip_count',
            'bounce_ratio', 'average_stay_time', 'avg_visit_pages',
            'trans_count', 'trans_ratio'
        ]
        if site_id is None:
            site_id = await self.get_site_id()
        url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
        params = {
            'site_id': site_id, 'method': 'visit/world/a',
            'access_token': BaiduConfig.ACCESS_TOKEN,
            'metrics': ','.join(metric)
        }
        if date_range:
            params['start_date'], params['end_date'] = date_range.split('-')
        if visitor:
            params['visitor'] = visitor
        if source:
            params['source'] = source
        resp = await self.get(url, params=params)
        return resp.json()


async def main():
    async with BaiduStat() as client:
        r = await client.get_time_trend_report()
        print(r)


if __name__ == '__main__':
    import asyncio

    asyncio.run(main())

上一篇:Hadoop完全分布式配置


下一篇:Hbase安装学习