import pandas as pd import numpy as np s = pd.Series([1,2,3,4,np.nan,6,8]) s
0 1.0 1 2.0 2 3.0 3 4.0 4 NaN 5 6.0 6 8.0 dtype: float64
1 dates = pd.date_range(‘20210712‘,periods=6) 2 dates
DatetimeIndex([‘2021-07-12‘, ‘2021-07-13‘, ‘2021-07-14‘, ‘2021-07-15‘, ‘2021-07-16‘, ‘2021-07-17‘], dtype=‘datetime64[ns]‘, freq=‘D‘)
1 df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list(‘ABCD‘)) 2 df
A B C D
2021-07-12 0.547980 0.504086 -0.341964 0.517595
2021-07-13 -1.359114 0.859277 -0.174702 -1.156648
2021-07-14 -0.695423 -0.442083 0.135932 0.295130
2021-07-15 0.590984 0.292082 0.780524 0.036832
2021-07-16 0.320222 0.182605 0.988981 0.864780
2021-07-17 -0.193702 0.645405 0.704703 0.680967
df.head(3)# 查看前3行代码
df.tail() # 查看后5行代码
df.index
DatetimeIndex([‘2021-07-12‘, ‘2021-07-13‘, ‘2021-07-14‘, ‘2021-07-15‘, ‘2021-07-16‘, ‘2021-07-17‘], dtype=‘datetime64[ns]‘, freq=‘D‘)
df.columns
Index([‘A‘, ‘B‘, ‘C‘, ‘D‘], dtype=‘object‘)
df.describe()# 快速查看数据的情况
df.sort_values(by=‘B‘) # 根据某列排序,这里显示错行了
A B C D
2021-07-14 -0.695423 -0.442083 0.135932 0.295130
2021-07-16 0.320222 0.182605 0.988981 0.864780
2021-07-15 0.590984 0.292082 0.780524 0.036832
2021-07-12 0.547980 0.504086 -0.341964 0.517595
2021-07-17 -0.193702 0.645405 0.704703 0.680967
2021-07-13 -1.359114 0.859277 -0.174702 -1.156648
df[‘A‘]# 取第1列的值
2021-07-12 0.547980
2021-07-13 -1.359114
2021-07-14 -0.695423
2021-07-15 0.590984
2021-07-16 0.320222
2021-07-17 -0.193702
Freq: D, Name: A, dtype: float64
df[0:3]# 切片
A B C D
2021-07-12 0.547980 0.504086 -0.341964 0.517595
2021-07-13 -1.359114 0.859277 -0.174702 -1.156648
2021-07-14 -0.695423 -0.442083 0.135932 0.295130