-
需求:方便查看每个域名每个域名对应的PV UV 独立IP 以及refer进行统计,所以nginx日志进行入库(离线入库),如果不想用设置简单的方式 可以考虑大数据的方式
简单:nginx日志-->nginx日志切割-->转移到统计服务器-->shell或者python入库-->分析结果接入结果表-->帆软出图
大数据方式:代码写拦截器-->flume日志收集-->kafka-->hdfs--hive load数据-->hive统计(可以用azkaban进行结果统计 )-->结果写入hive表-->hue? ?superset? druid查询结果(或者Sqoop同步到mysql里面去 )
-
nginx日志格式
?
log_format mains $server_name "|" $remote_addr "|" [$time_iso8601] "|" "$request"? "|"
$status? "|" $body_bytes_sent "|"? "$http_referer"? ?"|"
"$http_user_agent"? "|"? $upstream_addr "|" $request_time? ?"|" $upstream_response_time|;
3.数据库设计
数据加载表 CREATE?TABLE?`tchg`?( ??`id`?bigint(255)?NOT?NULL?AUTO_INCREMENT, ??`server_name`?varchar(60)?DEFAULT?NULL, ??`remote_addr`?varchar(50)?DEFAULT?NULL, ??`time_iso8601`?varchar(50)?DEFAULT?NULL, ??`request`?varchar(1000)?DEFAULT?NULL, ??`status`?varchar(10)?CHARACTER?SET?utf8?COLLATE?utf8_general_ci?DEFAULT?NULL, ??`body_bytes_sent`?varchar(10)?DEFAULT?NULL, ??`http_referer`?varchar(80)?DEFAULT?NULL, ??`http_user_agent`?varchar(800)?CHARACTER?SET?utf8?COLLATE?utf8_general_ci?DEFAULT?NULL, ??`upstream_addr`?varchar(50)?DEFAULT?NULL, ??`request_time`?varchar(10)?DEFAULT?NULL, ??`upstream_response_time`?varchar(10)?DEFAULT?NULL, ??PRIMARY?KEY?(`id`), ??KEY?`index_remote_addr`?(`remote_addr`), ??KEY?`index_request`?(`request`), ??KEY?`index_status`?(`request`), ??KEY?`index_request_time`?(`request_time`), ??KEY?`index_time_iso8601`?(`time_iso8601`) )?ENGINE=InnoDB?AUTO_INCREMENT=24822?DEFAULT?CHARSET=utf8; 统计结果表 CREATE?TABLE?`nginx_api`?( ??`id`?bigint(20)?NOT?NULL?AUTO_INCREMENT, ??`date`?varchar(20)?DEFAULT?NULL, ??`type`?varchar(50)?DEFAULT?NULL, ??`request`?varchar(800)?CHARACTER?SET?utf8?COLLATE?utf8_general_ci?DEFAULT?NULL, ??`count`?varchar(8)?DEFAULT?NULL, ??PRIMARY?KEY?(`id`), ??KEY?`index_date`?(`date`), ??KEY?`index_type`?(`type`) )?ENGINE=InnoDB?AUTO_INCREMENT=1004?DEFAULT?CHARSET=utf8; CREATE?TABLE?`nginx_ip`?( ??`id`?bigint(20)?NOT?NULL?AUTO_INCREMENT, ??`date`?varchar(20)?DEFAULT?NULL, ??`type`?varchar(50)?DEFAULT?NULL, ??`remote_addr`?varchar(50)?DEFAULT?NULL, ??`count`?varchar(8)?DEFAULT?NULL, ??PRIMARY?KEY?(`id`), ??KEY?`index_date`?(`date`), ??KEY?`index_type`?(`type`) )?ENGINE=InnoDB?AUTO_INCREMENT=4506?DEFAULT?CHARSET=utf8; CREATE?TABLE?`nginx_pv`?( ??`id`?bigint(20)?NOT?NULL?AUTO_INCREMENT, ??`date`?varchar(20)?DEFAULT?NULL, ??`type`?varchar(50)?DEFAULT?NULL, ??`pv`?varchar(8)?DEFAULT?NULL, ??PRIMARY?KEY?(`id`), ??KEY?`index_date`?(`date`), ??KEY?`index_type`?(`type`) )?ENGINE=InnoDB?AUTO_INCREMENT=337?DEFAULT?CHARSET=utf8;
3.数据同步(数据入库 是使用的kettle读取文本 写入数据库 不懂kettle的童鞋 可以学习下kettle)
5.分析数据
#!/bin/bash time=`date??-d?"1?day?ago"???+%Y-%m-%d` user="root" pwd="123456" host="127.0.0.1" type=(expertpaycallback?frontadminapi?frontbsapi?frontpartnerapi?hmg?hmzh?livecbapi?m?offlinebsapi?paycallback?tchg?tokenurl?zbcadminapi?zbc) for((?i=0;i<${#type[@]};i++))? do mysql?-u${user}?-p${pwd}?-h${host}?-e?"use?slowlog;insert?into?nginx_api(date,type,request,count)?select?date_sub(curdate(),interval?1?day)?as?‘date‘,\"${type[i]}\"?as?type,request,count(*)?as?count?from?${type[i]}??where?time_iso8601?like?\"[${time}%\"?and??request_time?>?0.3?GROUP?BY?request?;" done [root@iZbp147i94k3xwda7ph5mcZ?statics]#?cat??ip_static.sh? #!/bin/bash time=`date??-d?"1?day?ago"???+%Y-%m-%d` user="root" pwd="123456" host="127.0.0.1" type=(expertpaycallback?frontadminapi?frontbsapi?frontpartnerapi?hmg?hmzh?livecbapi?m?offlinebsapi?paycallback?tchg?tokenurl?zbcadminapi?zbc) for((?i=0;i<${#type[@]};i++))? do #select?date_sub(curdate(),interval?1?day)?as?‘date‘,"sss"?as?type,remote_addr,count(*)?as?count?from?expertpaycallback??where?time_iso8601?like?"[2021-02-24%"?GROUP?BY?remote_addr??ORDER?BY?count?desc?limit?5;" mysql?-u${user}?-p${pwd}?-h${host}?-e?"use?slowlog;insert?into?nginx_ip(date,type,remote_addr,count)?select?date_sub(curdate(),interval?1?day)?as?‘date‘,\"${type[i]}\"?as?type,remote_addr,count(*)?as?count?from?${type[i]}??where?time_iso8601?like?\"[${time}%\"?GROUP?BY?remote_addr?ORDER?BY?count?desc?limit?30;;" sleep?1 done [root@iZbp147i94k3xwda7ph5mcZ?statics]#?cat??pv_static.sh? #!/bin/bash time=`date??-d?"1?day?ago"???+%Y-%m-%d` user="root" pwd="123456" host="127.0.0.1" type=(expertpaycallback?frontadminapi?frontbsapi?frontpartnerapi?hmg?hmzh?livecbapi?m?offlinebsapi?paycallback?tchg?tokenurl?zbcadminapi?zbc) for((?i=0;i<${#type[@]};i++))? ??do? ????mysql??-u${user}???-p${pwd}???-h${host}??-e?"use?slowlog;insert??into??nginx_pv(date,type,pv)?select?date_sub(curdate(),interval?1?day)?as?‘date‘,\"${type[i]}\"??as??type,count(1)?as??pv?from??${type[i]}??where?time_iso8601?like?\"[${time}%\";" done
6.查看数据? 这个比较简单 稍微好一点? ?echarts 出图即可