1 #encoding=utf-8 2 import numpy as np 3 import pandas as pd 4 df = pd.DataFrame( 5 [ 6 ("bird", "Falconiformes", 389.0), 7 ("bird", "Psittaciformes", 24.0), 8 ("mammal", "Carnivora", 80.2), 9 ("mammal", "Primates", np.nan), 10 ("mammal", "Carnivora", 58), 11 ], 12 index=["falcon", "parrot", "lion", "monkey", "leopard"], 13 columns=("class", "order", "max_speed"), 14 ) 15 df.groupby(by=['class','order'])['max_speed'].count().rename('speed').to_frame().reset_index()
第15行 groupby为使用两个字段分组,count()取分组后max_speed的出现次数,rename重命名Series的列,to_frame将Series转为DataFrame,reset_index重建索引,方便导出到excel
重建索引前后对比
RangeIndex(start=0, stop=4, step=1)
MultiIndex([( 'bird', 'Falconiformes'), ( 'bird', 'Psittaciformes'), ('mammal', 'Carnivora'), ('mammal', 'Primates')], names=['class', 'order'])