groupby,分组

from operator import itemgetter
from itertools import groupby

rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# 按照date分组
# 使用 groupby() 仅仅检查连续的元素

rows_item_by_date = sorted(rows, key=itemgetter('date'))

for date, items in groupby(rows_item_by_date, itemgetter('date')):
    print(date)
    for i in items:
        print(' ', i)

print(' ')

# 使用 defaultdict 分组
# 如果对内存占用不是很关 心,这种方式会比先排序然后再通过 groupby() 函数迭代的方式运行得快一些

from collections import defaultdict

rows_item_by_date2 = defaultdict(list)
for r in rows:
    rows_item_by_date2[r['date']].append(r)

print(rows_item_by_date2)
print(rows_item_by_date2['07/01/2012'])

上一篇:数据分析04 /pandas的高级操作


下一篇:python set();lambda();zip();groupby(