import numpy as np
import pandas as pd
df=pd.DataFrame([[1.4,np.nan],[7.1,-4.5],
[np.nan,np.nan],[0.75,-1.3]],
index=['a','b','c','d'],
columns=['one','two'])
df
|
one |
two |
a |
1.40 |
NaN |
b |
7.10 |
-4.5 |
c |
NaN |
NaN |
d |
0.75 |
-1.3 |
#默认按列求和
df.sum()
one 9.25
two -5.80
dtype: float64
# 按行求和
df.sum(axis=1) # skipna = False
a 1.40
b 2.60
c 0.00
d -0.55
dtype: float64
df.idxmax()
one b
two d
dtype: object
df.cumsum()
|
one |
two |
a |
1.40 |
NaN |
b |
8.50 |
-4.5 |
c |
NaN |
NaN |
d |
9.25 |
-5.8 |
# 汇总统计
df.describe()
|
one |
two |
count |
3.000000 |
2.000000 |
mean |
3.083333 |
-2.900000 |
std |
3.493685 |
2.262742 |
min |
0.750000 |
-4.500000 |
25% |
1.075000 |
-3.700000 |
50% |
1.400000 |
-2.900000 |
75% |
4.250000 |
-2.100000 |
max |
7.100000 |
-1.300000 |
s1 = pd.Series(['a','a','b','c']*4)
s1
0 a
1 a
2 b
3 c
4 a
5 a
6 b
7 c
8 a
9 a
10 b
11 c
12 a
13 a
14 b
15 c
dtype: object
s1.describe()
count 16
unique 3
top a
freq 8
dtype: object