临时需求,需要把两年的csv文件转换格式,因为utf-8文件用Excel打开是乱码!24个目录700多个文件,手动处理太浪费时间。。。于是想着批量处理方案:
# -*- encoding: utf-8 -*-
"""
fc_test.py
Created on 2020/3/9 0009 下午 5:06
@author: LHX
"""
import os
import sys
import codecs
import chardet
# 将路径下面的所有文件,从原来的格式变为UTF-8的格式
def file_convert(path1,file_name,file, in_code="GBK", out_code="UTF-8"):
"""
该程序用于将目录下的文件从指定格式转换到指定格式,默认的是GBK转到UTF-8
需要手动建立输出目录(输入目录_convert)
"""
out_path = path1+'_convert'
print("==111",out_path)
try:
with codecs.open(file_name, 'r', in_code) as f_in:
new_content = f_in.read()
f_out = codecs.open(os.path.join(out_path,file), 'w', out_code)
f_out.write(new_content)
f_out.close
except IOError as err:
print("I/O error: {0}".format(err))
import os
#根据输入目录循环所有子目录&文件
def list_folders_files(path1):
lsdir = os.listdir(path1)
dirs = [i for i in lsdir if os.path.isdir(os.path.join(path1, i))]
if dirs:
for i in dirs:
list_folders_files(os.path.join(path1, i))
files = [i for i in lsdir if os.path.isfile(os.path.join(path1,i))]
# for f in files:
# print ("==",os.path.join(path, f))
for f in files:
file_name =os.path.join(path1,f)
with open(file_name, "rb") as f_in:
data = f_in.read()
code_type = chardet.detect(data)['encoding']
file_convert(path1,file_name,f, code_type, 'UTF-8')
#设置输入目录并执行程序
path = 'E:\\tmp\\2018_1\\'
list_folders_files(path)