pandas pivot_table或者groupby实现sql 中的count distinct 功能

2023-12-22 13:16:10

import pandas as pd

import numpy as np

data = pd.read_csv('活跃买家分析初稿.csv')

data.head()

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

	recycler_key	date 周	date 年	date 月	记录数
0	1694	周 1	2018	一月	6
1	1693	周 1	2018	一月	14
2	1686	周 1	2018	一月	20
3	1677	周 1	2018	一月	62
4	1676	周 1	2018	一月	25

data.columns=['merchant','week','year','month','records']

data.head()

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

	merchant	week	year	month	records
0	1694	周 1	2018	一月	6
1	1693	周 1	2018	一月	14
2	1686	周 1	2018	一月	20
3	1677	周 1	2018	一月	62
4	1676	周 1	2018	一月	25

data1 =data.groupby(['month','merchant']).size()

data1.head()

month  merchant

一月     1           2

       240         1

       241         1

       256         9

       277         2

dtype: int64

data1.reset_index().head()

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

data1.reset_index().groupby('month')['merchant'].size().reindex(['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']).reset_index()

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

data2=data.pivot_table(index='month',values='merchant',aggfunc=lambda x:len(x.unique()))

data2.reindex(['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']).reset_index()

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

data3 = data.pivot_table(index='month',values='merchant',aggfunc=pd.Series.nunique)

data3.reindex(['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']).reset_index()

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

data4 = data.groupby(['month']).agg({'merchant': pd.Series.nunique})

data4.reindex(['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']).reset_index()

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

码农公寓