没有找到mongoDB的去重功能,自己写一个,利用分组计数功能,找到count>1的记录,逐一删除重复记录
import pymongo def mongo_dup_remove(db,col,*args): #对指定集合去重 #未考虑数据排序 #支持多个参数 myclient = pymongo.MongoClient("mongodb://localhost:27017/") mydb = myclient[db] mycol = mydb[col] project={'_id':0} project.update({x:1 for x in args}) id_={x:f'${x}' for x in args} group={'_id':id_} group.update({"count":{"$sum":1}}) pipeline=[ {'$project':project}, {'$group':group}, {"$match": { "count": {"$gt": 1} } } ] for col in mycol.aggregate(pipeline): query=col['_id'] count=col['count'] while mycol.count_documents(query) > 1: mycol.delete_one(query) print(f'已删除:{query},重复数:{count}') return