1 import pandas as pd 2 import numpy as np 3 4 s = pd.Series(np.random.rand(5), index=list('abcde')) 5 # 创建序列,其中 index=list('abcde')为每一行添加索引 6 s.index.name='alpha' # 为行索引添加名称标签 7 8 df = pd.DataFrame(np.random.randn(4,3), columns=['one','two','three']) 9 # 创建DataFrame,其中columns=['one','two','three']表示为每一列添加索引 10 df.index.name = 'row' # 为行索引添加名称标签 11 df.columns.name = 'col' # 为列索引添加名称标签
1 import pandas as pd 2 import numpy as np 3 4 s = pd.Series(np.arange(6), index=list('abcbda')) 5 # 创建一个带有重复索引的Series 6 7 s['a'] # 找出a的所有索引对应的值 8 s.index.is_unique # 判断s中每个索引是否独一无二 9 s.index.unique() # 找出s中不重复的索引 10 11 s.groupby(s.index).sum() # 将索引分组并求和 12 s.groupby(s.index).mean() # 将索引分组并求均值 13 s.groupby(s.index).first() # 将索引分组并取第一项
1 import pandas as pd 2 import numpy as np 3 4 a = [['a','a','a','b','b','c','c'],[1,2,3,1,2,2,3]] 5 t = list(zip(*a)) 6 index = pd.MultiIndex.from_tuples(t,names=['level1','level2']) 7 s = pd.Series(np.random.rand(7),index=index) 8 # 输出 s 9 level1 level2 10 a 1 0.029233 11 2 0.539508 12 3 0.502217 13 b 1 0.536222 14 2 0.217398 15 c 2 0.551864 16 3 0.596248 17 18 s['b'] 19 # 输出 20 level2 21 1 0.536222 22 2 0.217398 23 dtype: float64 24 25 s['b':'c'] 26 # 输出 27 level1 level2 28 b 1 0.536222 29 2 0.217398 30 c 2 0.551864 31 3 0.596248 32 dtype: float64 33 34 s[['a','c']] 35 # 输出 36 level1 level2 37 a 1 0.029233 38 2 0.539508 39 3 0.502217 40 c 2 0.551864 41 3 0.596248 42 dtype: float64 43 44 s[:,2] 45 # 输出 46 level1 47 a 0.539508 48 b 0.217398 49 c 0.551864 50 dtype: float64