昨天 Python释放了 3.5 ,添加了 os.scandir
根据文档该API比os.listdir
快Docs
which speeds it up by 3-5 times on POSIX systems and by 7-20 times on Windows systems
以前因为目录太大(文件数过万),listdir又太慢,写了一个自己的listdir,发布一下 (仅支持Linux)
#!/usr/bin/python
import os
import ctypes
from ctypes.util import find_library
clib = ctypes.CDLL(find_library('C'))
class c_dir(ctypes.Structure):
pass
class c_dirent(ctypes.Structure):
_fields_ = (
('d_ino', ctypes.c_long),
('d_off', ctypes.c_long), # offset
('d_reclen', ctypes.c_ushort), # record length
('d_type', ctypes.c_byte),
('d_name', ctypes.c_char *4096),
)
c_dir_p = ctypes.POINTER(c_dir)
c_dirent_p = ctypes.POINTER(c_dirent)
opendir, readdir, closedir = clib.opendir, clib.readdir, clib.closedir
opendir.argtypes = [ ctypes.c_char_p ]
opendir.restype = c_dir_p
readdir.argtypes = [ c_dir_p ]
readdir.restype = c_dirent_p
closedir.argtypes = [ c_dir_p ]
closedir.restype = ctypes.c_int
def countdir(path):
if not os.path.isdir(path):
raise ValueError('arg error, not a dir: '+path)
dirfd = opendir(path)
total_num, total_filename, total_metasize = 0, 0, 0
try:
while True:
entry = readdir(dirfd)
if not entry:
break
total_filename += len(entry.contents.d_name)
total_metasize += entry.contents.d_reclen
total_num += 1
finally:
closedir(dirfd)
return {"count":total_num-2, "total_filename":total_filename, "total_metasize":total_metasize,"dirsize":os.path.getsize(path)}
def listdir(path):
'include two special dirs: . and .. '
if not os.path.isdir(path):
raise ValueError('arg error, not a dir: '+path)
dirfd = opendir(path)
try:
while True:
entry = readdir(dirfd)
if not entry:
break
yield {"name":entry.contents.d_name,
"inode": entry.contents.d_ino,
"metasize":entry.contents.d_reclen}
finally:
closedir(dirfd)
if __name__ == '__main__':
import sys
i = 0
total = 0
path = sys.argv[1]
print( countdir(path) )
# for entry in listdir(path):
# print(entry['name'], entry['metasize'])
# total += entry['metasize']
# print('total:', total, 'dir size: ', os.path.getsize(path))