lead 计算次日留存率,一般没有问题,但是如果统计第3日,第4日及以后,会有一点点的小问题,会存在逻辑不严谨;
正确代码
with tmp_da_uid_project as (
select
ds
,to_date(ds,'yyyymmdd') as dt
,project_id
,uid
,count(distinct case when event_id = 2001 then uid end) as uv
,count( case when event_id = 2001 then uid end) as pv
,count(distinct uid ) as uv_all
,count( uid ) as pv_all
from tmp_zj_project_utrack_detail_quickbi
where event_id = 2001
group by
ds
,project_id
,uid
)
, tmp_da_uid_project_ext as (
select
*
,datediff(t1.ds_login_lead,t1.dt,'dd') as diff_login_lead_project
,datediff(nvl(t1.ds_login_lead2,t1.ds_login_lead),t1.dt,'dd') as diff_login_lead2_project
from(
select
ds
,dt
,project_id
,uid
,uv
,pv
,uv_all
,pv_all
,lag(dt,1)over(partition by project_id,uid order by dt asc ) as ds_login_lag
,lead(dt,1)over(partition by project_id,uid order by dt asc ) as ds_login_lead
,lead(dt,2)over(partition by project_id,uid order by dt asc ) as ds_login_lead2
from tmp_da_uid_project
)t1
)
select
ds
,project_id
,count(distinct uid ) as uv
,count(distinct case when diff_login_lead_project = 1 then uid end) as 次日
,count(distinct case when diff_login_lead2_project = 2 then uid end) as 第3日
from tmp_da_uid_project_ext
where project_id = '2178'
and uid = '1833905061400054'
group by
ds
,project_id
错误代码示例截图:
明细数据信息