批量修改文件编码

在平常的工作中大家喜欢使用vscode编码,而又使用默认的非utf-8编码方式。那么,在linux环境上,打开读取含有汉字的文件,那么显示的内容就非常奇特了,怎么也看不懂。这样的情况怎么办呢,如果成百上千的文件都是这样呢。这里提供一个小工具,转换某种编码到UTF-8格式,解决跨平台环境下,字符编码不一致产生的问题。

#!/bin/bash
#file  IconvFileEncoding2utf-8.sh
#note   .
#brief
#author (*@*.com.cn)
#date   2019‎-09‎-‎23‎ ‏‎15:22:08
#note
#history
#warning
CUR_DIR=${PWD}
echo "cur dir:$CUR_DIR"

ICONV_LOG="${CUR_DIR}/ic.log"
echo "" > ${ICONV_LOG}
encodingFd="$CUR_DIR/encodingTransform.vim"
if [ ! -f ${encodingFd} ];then
    touch ${encodingFd}
    echo ":set fileencoding=utf-8" >> ${encodingFd}
    echo ":wq!" >> ${encodingFd}
fi
#C_CPP_HEAD_FILES=(`find . -name "*.h" | xargs file | awk '{print $1,$4}'`)
#C_CPP_HEAD_FILES=(`find . -type f -regex '.*\(\.h\|\.c\|\.cpp\)' | xargs file | awk '{if(($2 == "UTF-8") || ($2 == "ISO-8859") || ($2 == "ASCII")) {print $1,$2} else if(($4 == "ASCII") || ($4 == "UTF-8") || ($4 == "ISO-8859")) {print $1,$4} else {print $1,"ERROR"}}' | tr -d ':'`)
C_CPP_HEAD_FILES=(`find . -type f -regex '.*\(\.h\|\.c\|\.cpp\)' | awk '{print $1}' | tr -d ':'`)
#file -bi ./include/openssl/bio.h | sed -e 's/.*[ ]charset=//' | tr '[a-z]' '[A-Z]'
C_CPP_HEAD_FILES_FD="$CUR_DIR/C_CPP_HEAD_FILES.info"

INFO_GAPS=2
#let "C_CPP_HEAD_FILES_LEN = ${#C_CPP_HEAD_FILES[*]}/${INFO_GAPS}"
let "C_CPP_HEAD_FILES_LEN = ${#C_CPP_HEAD_FILES[*]}"
echo "the Number of C and CPP files" ${C_CPP_HEAD_FILES_LEN}

#for (( i=0;i<C_CPP_HEAD_FILES_LEN; i+= INFO_GAPS ));do
for (( i=0;i<C_CPP_HEAD_FILES_LEN; ++i ));do
#   FD_CTIME=`stat "${C_CPP_HEAD_FILES[i]}" -c %y | awk '{print $1,$2}'`
    FD_ENCODING=`file -bi "${C_CPP_HEAD_FILES[i]}" | sed -e 's/.*[ ]charset=//' | tr '[a-z]' '[A-Z]'`
    if [ "${FD_ENCODING}"X == "UTF-8"X -o "${FD_ENCODING}" == "utf-8"X ];then
        echo "${C_CPP_HEAD_FILES[i]} already utf-8"
    else
        iconv -l | grep "${FD_ENCODING}" >/dev/null 2>&1
        [[ $? -ne 0 ]] && echo "${C_CPP_HEAD_FILES[i]} ${FD_ENCODING}:iconv not support." >> ${ICONV_LOG} && continue 
    #if [ "${C_CPP_HEAD_FILES[i+1]}"X == "UTF-8"X  ];then
    #    echo "${C_CPP_HEAD_FILES[i]} already utf-8"
    #elif [ "${C_CPP_HEAD_FILES[i+1]}"X == "ERROR"X ];then
    #    echo "${C_CPP_HEAD_FILES[i]} No known file encoding." >> ${ICONV_LOG}
    #elif [ "${C_CPP_HEAD_FILES[i+1]}"X == "ISO-8859"X ];then
    #    echo "iconv not support ISO-8859" >> ${ICONV_LOG}
    #else
        cp -rfp ${C_CPP_HEAD_FILES[i]} "${C_CPP_HEAD_FILES[i]}"_BK
        #echo "${C_CPP_HEAD_FILES[i]} ${C_CPP_HEAD_FILES[i]}" >> ${ICONV_LOG}
#        iconv -f ${FD_ENCODING} -t UTF-8 ${C_CPP_HEAD_FILES[i]} -o "${C_CPP_HEAD_FILES[i]}"_UTF-8
        vim -s encodingTransform.vim ${C_CPP_HEAD_FILES[i]}
        if [ $? -eq 0 ];then
#            mv "${C_CPP_HEAD_FILES[i]}"_UTF-8 ${C_CPP_HEAD_FILES[i]}
            touch -r "${C_CPP_HEAD_FILES[i]}"_BK ${C_CPP_HEAD_FILES[i]}
            rm -f "${C_CPP_HEAD_FILES[i]}"_BK
        else
            echo "iconv -f ${FD_ENCODING} -t UTF-8 ${C_CPP_HEAD_FILES[i]} -o "${C_CPP_HEAD_FILES[i]}"_UTF-8 err!" >> ${ICONV_LOG}
        fi
        dos2unix ${C_CPP_HEAD_FILES[i]}
    fi
    
   # echo "${C_CPP_HEAD_FILES[i]}" >> ${C_CPP_HEAD_FILES_FD}
done

 

上一篇:PHP导出csv 注意事项


下一篇:爬虫spider