pandas入门:层次化索引

from pandas import Series
import numpy as np

data = Series(np.random.randn(10),
              index=[['a','a','a','b','b','b','c','c','d','d'],
                     [1,2,3,1,2,3,1,2,2,3]])
print(data)
'''
a  1    0.050239
   2    1.886958
   3   -1.366131
b  1    1.678755
   2    0.029100
   3   -1.121555
c  1    1.732161
   2    0.401984
d  2    1.368133
   3   -0.631580
dtype: float64
'''
print(data.index)
'''
MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 2),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )
'''
print(data['b'])
'''
1    1.678755
2    0.029100
3   -1.121555
dtype: float64
'''
print(data['b':'c'])
'''
b  1    1.678755
   2    0.029100
   3   -1.121555
c  1    1.732161
   2    0.401984
dtype: float64
'''
print(data.loc[['b','d']])
'''
b  1    1.678755
   2    0.029100
   3   -1.121555
d  2    1.368133
   3   -0.631580
dtype: float64
'''
print(data[:,2])
'''
a    1.886958
b    0.029100
c    0.401984
d    1.368133
dtype: float64
'''
print(data.unstack())
'''
          1         2         3
a  0.050239  1.886958 -1.366131
b  1.678755  0.029100 -1.121555
c  1.732161  0.401984       NaN
d       NaN  1.368133 -0.631580
'''
print(data.unstack().stack())
'''
a  1    0.050239
   2    1.886958
   3   -1.366131
b  1    1.678755
   2    0.029100
   3   -1.121555
c  1    1.732161
   2    0.401984
d  2    1.368133
   3   -0.631580
dtype: float64
'''
from pandas import Series, DataFrame, MultiIndex
import pandas as pd
import numpy as np

frame = DataFrame(np.arange(12).reshape((4,3)),
                  index=[['a','a','b','b'],[1,2,1,2]],
                  columns=[['Ohio','Ohio','Colorado'],
                           ['Green','Red','Green']])
print(frame)
'''
     Ohio     Colorado
    Green Red    Green
a 1     0   1        2
  2     3   4        5
b 1     6   7        8
  2     9  10       11
'''
frame.index.names = ['key1','key2']
frame.columns.names = ['state','color']
print(frame)
'''
state      Ohio     Colorado
color     Green Red    Green
key1 key2                   
a    1        0   1        2
     2        3   4        5
b    1        6   7        8
     2        9  10       11
'''
print(frame['Ohio'])
'''
color      Green  Red
key1 key2            
a    1         0    1
     2         3    4
b    1         6    7
     2         9   10
'''

mIndex = MultiIndex.from_arrays([['Ohio','Ohio','Colorado'],['Green','Red','Green']],
                       names=['state','color'])
frame2 = DataFrame(np.arange(12).reshape((4,3)),
                   columns=mIndex)
print(frame2)
'''
state  Ohio     Colorado
color Green Red    Green
0         0   1        2
1         3   4        5
2         6   7        8
3         9  10       11
'''
# 重排分级顺序
frame3 = frame.swaplevel('key1','key2')
print(frame3)
'''
state      Ohio     Colorado
color     Green Red    Green
key2 key1                   
1    a        0   1        2
2    a        3   4        5
1    b        6   7        8
2    b        9  10       11
'''
frame4 = frame.swaplevel(0,1).sort_index(0)
print(frame4)
'''
state      Ohio     Colorado
color     Green Red    Green
key2 key1                   
1    a        0   1        2
     b        6   7        8
2    a        3   4        5
     b        9  10       11
'''
# 根据级别汇总统计
frame5 = frame.sum(level='key2')
print(frame5)
'''
state  Ohio     Colorado
color Green Red    Green
key2                    
1         6   8       10
2        12  14       16
'''
frame6 = frame.sum(level='color',axis=1)
print(frame6)
'''
color      Green  Red
key1 key2            
a    1         2    1
     2         8    4
b    1        14    7
     2        20   10
'''
from pandas import DataFrame

frame = DataFrame({'a':range(7),
                   'b':range(7,0,-1),
                   'c':['one','one','one','two','two','two','two'],
                   'd':[0,1,2,0,1,2,3]})
print(frame)
'''
   a  b    c  d
0  0  7  one  0
1  1  6  one  1
2  2  5  one  2
3  3  4  two  0
4  4  3  two  1
5  5  2  two  2
6  6  1  two  3
'''
# set_index 会将其一个或多个列转换为行索引,并创建一个新的DataFrame
frame2 = frame.set_index(['c','d'])
print(frame2)
'''
       a  b
c   d      
one 0  0  7
    1  1  6
    2  2  5
two 0  3  4
    1  4  3
    2  5  2
    3  6  1
'''
# 默认情况下那些列会从DataFrame中移除,也可将其保留下来
frame3 = frame.set_index(['c','d'],drop=False)
print(frame3)
'''
       a  b    c  d
c   d              
one 0  0  7  one  0
    1  1  6  one  1
    2  2  5  one  2
two 0  3  4  two  0
    1  4  3  two  1
    2  5  2  two  2
    3  6  1  two  3
'''
# reset_index与set_index相反,层次化索引会转移到列里面
frame4 = frame2.reset_index()
print(frame4)
'''
     c  d  a  b
0  one  0  0  7
1  one  1  1  6
2  one  2  2  5
3  two  0  3  4
4  two  1  4  3
5  two  2  5  2
6  two  3  6  1
'''
上一篇:Attribute-based Signature


下一篇:爬虫学习06用selenium爬取空间