由于太久没有动笔所以决定瞎发点东西最近在接入百度统计API,虽然没有对响应结果做进一步处理,还是希望能够减少大家的工作量
百度商业账号接口未对接,百度普通账号所有接口已接入,基于第三方库httpx
,所有方法都是异步调用的
如果想要快速改成同步调用,只需替换如下几个关键字
- "AsyncClient" -> "Client"
- "async " -> ""
- "await " -> ""
from httpx import AsyncClient, Response
class BaiduConfig:
# docs https://tongji.baidu.com/api/manual/Chapter2/openapi.html
CLIENT_ID = '自行申请'
CLIENT_SECRET = '自行申请'
REFRESH_TOKEN = '见百度授权文档'
ACCESS_TOKEN = '见百度授权文档'
class BaiduStat(AsyncClient):
# docs https://tongji.baidu.com/api/manual/
site_id: int = None
fields = {
'pv_count': '浏览量(PV)', 'pv_ratio': '浏览量占比', 'visit_count': '来源',
'visitor_count': '访客数(UV)', 'new_visitor_count': '新访客数',
'new_visitor_ratio': '新访客比率', 'ip_count': 'IP 数',
'bounce_ratio': '跳出率', 'avg_visit_time': '平均访问时长',
'avg_visit_pages': '平均访问页数', 'trans_count': '转化次数',
'trans_ratio': '转化率', 'visit1_count': '入口页次数',
'outward_count': '贡献下游浏览量', 'exit_count': '退出页次数',
'average_stay_time': '平均停留时长', 'exit_ratio': '退出率',
'out_pv_count': '贡献浏览量',
}
area = {
'全国': 'china', '北京': 'province, 1', '上海': 'province, 2',
'天津': 'province, 3', '广东': 'province, 4', '福建': 'province, 5',
'海南': 'province, 8', '安徽': 'province, 9', '贵州': 'province, 10',
'甘肃': 'province, 11', '广西': 'province, 12', '河北': 'province, 13',
'河南': 'province, 14', '黑龙江': 'province, 15', '湖北': 'province, 16',
'湖南': 'province, 17', '吉林': 'province, 18', '江苏': 'province, 19',
'江西': 'province, 20', '辽宁': 'province, 21', '内蒙古': 'province, 22',
'宁夏': 'province, 23', '青海': 'province, 24', '山东': 'province, 25',
'山西': 'province, 26', '陕西': 'province, 27', '四川': 'province, 28',
'*': 'province, 29', '*': 'province, 30', '云南': 'province, 31',
'浙江': 'province, 32', '重庆': 'province, 33', '香港': 'province, 34',
'*': 'province, 35', '澳门': 'province, 36'
}
async def get(self, url, *args, **kwargs) -> Response:
# 增加token过期判断逻辑
resp = await super().get(url, *args, **kwargs)
if resp.json().get('error_code') == 111: # Access token expired
await self.refresh_token()
resp = await super().get(url, *args, **kwargs)
return resp
async def refresh_token(self):
url = 'https://openapi.baidu.com/oauth/2.0/token'
params = {
'grant_type': 'refresh_token', 'client_id': BaiduConfig.CLIENT_ID,
'client_secret': BaiduConfig.CLIENT_SECRET,
'refresh_token': BaiduConfig.REFRESH_TOKEN,
}
resp = (await self.get(url, params=params)).json()
access_token = resp.get('access_token')
refresh_token = resp.get('refresh_token')
if access_token and refresh_token:
with open(__file__, 'r+') as f:
content = f.read()
f.seek(0)
f.truncate()
content = content.replace(BaiduConfig.ACCESS_TOKEN, access_token)
content = content.replace(BaiduConfig.REFRESH_TOKEN, refresh_token)
f.write(content)
BaiduConfig.ACCESS_TOKEN = access_token
BaiduConfig.REFRESH_TOKEN = refresh_token
async def get_site_list(self):
url = 'https://openapi.baidu.com/rest/2.0/tongji/config/getSiteList'
params = {'access_token': BaiduConfig.ACCESS_TOKEN}
resp = await self.get(url, params=params)
return resp.json()
async def get_site_id(self, domain=None):
"""获取站点id
:param domain: 不传入时 选择第一个站点
:return:
"""
if self.site_id is None:
site_resp = await self.get_site_list()
for site in site_resp.get('list', []):
if domain is None:
self.site_id = site['site_id']
break
if site['domain'] == domain:
self.site_id = site['site_id']
break
return self.site_id
async def get_time_trend_report(self, site_id=None, date_range=None):
"""获取站点趋势数据 浏览量PV、访客数UV、IP数
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
"""
metric = [
'pv_count', 'visitor_count', 'ip_count', 'bounce_ratio',
'avg_visit_time', 'trans_count'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'overview/getTimeTrendRpt',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
resp = await self.get(url, params=params)
return resp.json()
async def get_district_report(self, site_id=None, date_range=None):
"""获取访客地域分布 浏览量PV、访客数UV、IP数
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:return:
"""
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'overview/getDistrictRpt',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': 'pv_count'
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
resp = await self.get(url, params=params)
return resp.json()
async def get_common_track_report(self, site_id=None, date_range=None):
"""获取访客来源
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:return:
"""
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'overview/getCommonTrackRpt',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': 'pv_count'
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
resp = await self.get(url, params=params)
return resp.json()
async def get_trend_analyse(self, site_id=None, date_range=None, gran=None,
source=None, clientDevice=None, area=None,
visitor=None):
"""获取趋势分析
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param gran: 时间粒度 可选值 day/hour/week/month
:param source: 来源过滤 可选值
through 直接访问
search,0 搜索引擎全部
link 外部链接
:param clientDevice: 设备过滤 可选值
pc 计算机
mobile 移动设备
:param area: 地域过滤 百度文档支持省级行政区 传入"广东"或"province,4"等效
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:return:
"""
metric = [
'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
'new_visitor_count', 'new_visitor_ratio', 'ip_count',
'avg_visit_time', 'avg_visit_pages', 'trans_count', 'trans_ratio',
'avg_trans_cost', 'income'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'trend/time/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if area:
params['area'] = self.area.get(area, area)
if source:
params['source'] = source
if clientDevice:
params['clientDevice'] = clientDevice
if visitor:
params['visitor'] = visitor
if gran:
params['gran'] = gran
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
resp = await self.get(url, params=params)
return resp.json()
async def get_latest_visit(self, site_id=None, date_range=None,
source=None, clientDevice=None,
visitor=None, area=None):
"""获取实时访客
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param source: 来源过滤 可选值
through 直接访问
search,0 搜索引擎全部
link 外部链接
:param clientDevice: 设备过滤 可选值
pc 计算机
mobile 移动设备
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:param area: 地域过滤 百度文档支持省级行政区 传入"广东"或"province,4"等效
:return:
"""
metric = ['start_time', 'area', 'source', 'access_page', 'keyword',
'searchword', 'is_ad', 'visitorId', 'ip', 'visit_time',
'visit_pages']
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'trend/latest/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if source:
params['source'] = source
if clientDevice:
params['clientDevice'] = clientDevice
if visitor:
params['visitor'] = visitor
if area:
params['area'] = self.area.get(area, area)
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
resp = await self.get(url, params=params)
return resp.json()
async def get_source_all(self, site_id=None, date_range=None,
viewType=None, clientDevice=None, visitor=None):
"""获取全部来源
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param viewType: 分类标准 可选值如下
type 按来源分类
site 按来源网站
:param clientDevice: 设备过滤 可选值
pc 计算机
mobile 移动设备
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:return:
"""
metric = [
'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
'new_visitor_count', 'new_visitor_ratio', 'ip_count',
'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
'trans_count', 'trans_ratio'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'source/all/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if clientDevice:
params['clientDevice'] = clientDevice
if visitor:
params['visitor'] = visitor
if viewType:
params['viewType'] = viewType
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
resp = await self.get(url, params=params)
return resp.json()
async def get_source_engine(self, site_id=None, date_range=None,
clientDevice=None, area=None, visitor=None):
"""获取搜索引擎来源
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param clientDevice: 设备过滤 可选值
pc 计算机
mobile 移动设备
:param area: 地域过滤 百度文档支持省级行政区 传入"广东"或"province,4"等效
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:return:
"""
metric = [
'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
'new_visitor_count', 'new_visitor_ratio', 'ip_count',
'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
'trans_count', 'trans_ratio'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'source/engine/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
if clientDevice:
params['clientDevice'] = clientDevice
if area:
params['area'] = self.area.get(area, area)
if visitor:
params['visitor'] = visitor
resp = await self.get(url, params=params)
return resp.json()
async def get_source_keyword(self, site_id=None, date_range=None,
source=None, clientDevice=None, visitor=None):
"""获取搜索词来源
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param source: 来源过滤 可选值
through 直接访问
search,0 搜索引擎全部
link 外部链接
:param clientDevice: 设备过滤 可选值
pc 计算机
mobile 移动设备
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:return:
"""
metric = [
'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
'new_visitor_count', 'new_visitor_ratio', 'ip_count',
'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
'trans_count', 'trans_ratio'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'source/searchword/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
if source:
params['source'] = source
if clientDevice:
params['clientDevice'] = clientDevice
if visitor:
params['visitor'] = visitor
resp = await self.get(url, params=params)
return resp.json()
async def get_source_link(self, site_id=None, date_range=None,
viewType=None, domainType=None,
clientDevice=None, visitor=None):
"""获取外部链接来源
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param viewType: 分类标准 可选值
domain 按域名
url 按URL
:param domainType: 域名筛选 可选值
1 社会化媒体
2 导航网站
4 电子邮箱
:param clientDevice: 设备过滤 可选值
pc 计算机
mobile 移动设备
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:return:
"""
metric = [
'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
'new_visitor_count', 'new_visitor_ratio', 'ip_count',
'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
'trans_count', 'trans_ratio'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'source/link/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
if viewType:
params['viewType'] = viewType
if domainType:
params['domainType'] = domainType
if clientDevice:
params['clientDevice'] = clientDevice
if visitor:
params['visitor'] = visitor
resp = await self.get(url, params=params)
return resp.json()
async def get_custom_media(self, site_id=None, date_range=None, flag=None):
"""指定广告跟踪
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param flag: 需要看哪个维度的数据 可选值
from 来源
plan 计划
unit 单元
word 关键词
idea 创意
:return:
"""
metric = [
'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
'new_visitor_count', 'new_visitor_ratio', 'ip_count',
'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
'trans_count', 'trans_ratio'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'custom/media/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
if flag:
params['flag'] = flag
resp = await self.get(url, params=params)
return resp.json()
async def get_visit_top_page(self, site_id=None, date_range=None,
source=None, visitor=None):
"""受访页面
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param source: 来源过滤 可选值
through 直接访问
search,0 搜索引擎全部
link 外部链接
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:return:
"""
metric = [
'pv_count', 'visitor_count', 'ip_count', 'visit1_count',
'outward_count', 'exit_count', 'average_stay_time', 'exit_ratio'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'visit/toppage/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
if source:
params['source'] = source
if visitor:
params['visitor'] = visitor
resp = await self.get(url, params=params)
return resp.json()
async def get_visit_landing_page(self, site_id=None, date_range=None):
"""入口页面
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:return:
"""
metric = [
'visit_count', 'visitor_count', 'new_visitor_count',
'new_visitor_ratio', 'ip_count', 'out_pv_count',
'bounce_ratio', 'avg_visit_time', 'avg_visit_pages',
'trans_count', 'trans_ratio'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'visit/landingpage/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
resp = await self.get(url, params=params)
return resp.json()
async def get_visit_top_domain(self, site_id=None, date_range=None,
source=None, visitor=None):
"""受访域名
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param source: 来源过滤 可选值
through 直接访问
search,0 搜索引擎全部
link 外部链接
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:return:
"""
metric = [
'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
'new_visitor_count', 'new_visitor_ratio', 'ip_count',
'average_stay_time', 'avg_visit_pages'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'visit/topdomain/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if source:
params['source'] = source
if visitor:
params['visitor'] = visitor
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
resp = await self.get(url, params=params)
return resp.json()
async def get_visit_district(self, site_id=None, date_range=None,
source=None, visitor=None):
"""地域分布(按省)
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param source: 来源过滤 可选值
through 直接访问
search,0 搜索引擎全部
link 外部链接
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:return:
"""
metric = [
'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
'new_visitor_count', 'new_visitor_ratio', 'ip_count',
'bounce_ratio', 'average_stay_time', 'avg_visit_pages',
'trans_count', 'trans_ratio'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'visit/district/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if source:
params['source'] = source
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
if visitor:
params['visitor'] = visitor
resp = await self.get(url, params=params)
return resp.json()
async def get_visit_world(self, site_id=None, date_range=None,
source=None, visitor=None):
"""地域分布(按国家)
:param site_id: 站点ID
:param date_range: 时间范围 如"20211001-20211007"
:param source: 来源过滤 可选值
through 直接访问
search,0 搜索引擎全部
link 外部链接
:param visitor: 访客过滤 可选值
new 新访客
old 老访客
:return:
"""
metric = [
'pv_count', 'pv_ratio', 'visit_count', 'visitor_count',
'new_visitor_count', 'new_visitor_ratio', 'ip_count',
'bounce_ratio', 'average_stay_time', 'avg_visit_pages',
'trans_count', 'trans_ratio'
]
if site_id is None:
site_id = await self.get_site_id()
url = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData'
params = {
'site_id': site_id, 'method': 'visit/world/a',
'access_token': BaiduConfig.ACCESS_TOKEN,
'metrics': ','.join(metric)
}
if date_range:
params['start_date'], params['end_date'] = date_range.split('-')
if visitor:
params['visitor'] = visitor
if source:
params['source'] = source
resp = await self.get(url, params=params)
return resp.json()
async def main():
async with BaiduStat() as client:
r = await client.get_time_trend_report()
print(r)
if __name__ == '__main__':
import asyncio
asyncio.run(main())