在平常的工作中大家喜欢使用vscode编码,而又使用默认的非utf-8编码方式。那么,在linux环境上,打开读取含有汉字的文件,那么显示的内容就非常奇特了,怎么也看不懂。这样的情况怎么办呢,如果成百上千的文件都是这样呢。这里提供一个小工具,转换某种编码到UTF-8格式,解决跨平台环境下,字符编码不一致产生的问题。
#!/bin/bash #file IconvFileEncoding2utf-8.sh #note . #brief #author (*@*.com.cn) #date 2019-09-23 15:22:08 #note #history #warning CUR_DIR=${PWD} echo "cur dir:$CUR_DIR" ICONV_LOG="${CUR_DIR}/ic.log" echo "" > ${ICONV_LOG} encodingFd="$CUR_DIR/encodingTransform.vim" if [ ! -f ${encodingFd} ];then touch ${encodingFd} echo ":set fileencoding=utf-8" >> ${encodingFd} echo ":wq!" >> ${encodingFd} fi #C_CPP_HEAD_FILES=(`find . -name "*.h" | xargs file | awk '{print $1,$4}'`) #C_CPP_HEAD_FILES=(`find . -type f -regex '.*\(\.h\|\.c\|\.cpp\)' | xargs file | awk '{if(($2 == "UTF-8") || ($2 == "ISO-8859") || ($2 == "ASCII")) {print $1,$2} else if(($4 == "ASCII") || ($4 == "UTF-8") || ($4 == "ISO-8859")) {print $1,$4} else {print $1,"ERROR"}}' | tr -d ':'`) C_CPP_HEAD_FILES=(`find . -type f -regex '.*\(\.h\|\.c\|\.cpp\)' | awk '{print $1}' | tr -d ':'`) #file -bi ./include/openssl/bio.h | sed -e 's/.*[ ]charset=//' | tr '[a-z]' '[A-Z]' C_CPP_HEAD_FILES_FD="$CUR_DIR/C_CPP_HEAD_FILES.info" INFO_GAPS=2 #let "C_CPP_HEAD_FILES_LEN = ${#C_CPP_HEAD_FILES[*]}/${INFO_GAPS}" let "C_CPP_HEAD_FILES_LEN = ${#C_CPP_HEAD_FILES[*]}" echo "the Number of C and CPP files" ${C_CPP_HEAD_FILES_LEN} #for (( i=0;i<C_CPP_HEAD_FILES_LEN; i+= INFO_GAPS ));do for (( i=0;i<C_CPP_HEAD_FILES_LEN; ++i ));do # FD_CTIME=`stat "${C_CPP_HEAD_FILES[i]}" -c %y | awk '{print $1,$2}'` FD_ENCODING=`file -bi "${C_CPP_HEAD_FILES[i]}" | sed -e 's/.*[ ]charset=//' | tr '[a-z]' '[A-Z]'` if [ "${FD_ENCODING}"X == "UTF-8"X -o "${FD_ENCODING}" == "utf-8"X ];then echo "${C_CPP_HEAD_FILES[i]} already utf-8" else iconv -l | grep "${FD_ENCODING}" >/dev/null 2>&1 [[ $? -ne 0 ]] && echo "${C_CPP_HEAD_FILES[i]} ${FD_ENCODING}:iconv not support." >> ${ICONV_LOG} && continue #if [ "${C_CPP_HEAD_FILES[i+1]}"X == "UTF-8"X ];then # echo "${C_CPP_HEAD_FILES[i]} already utf-8" #elif [ "${C_CPP_HEAD_FILES[i+1]}"X == "ERROR"X ];then # echo "${C_CPP_HEAD_FILES[i]} No known file encoding." >> ${ICONV_LOG} #elif [ "${C_CPP_HEAD_FILES[i+1]}"X == "ISO-8859"X ];then # echo "iconv not support ISO-8859" >> ${ICONV_LOG} #else cp -rfp ${C_CPP_HEAD_FILES[i]} "${C_CPP_HEAD_FILES[i]}"_BK #echo "${C_CPP_HEAD_FILES[i]} ${C_CPP_HEAD_FILES[i]}" >> ${ICONV_LOG} # iconv -f ${FD_ENCODING} -t UTF-8 ${C_CPP_HEAD_FILES[i]} -o "${C_CPP_HEAD_FILES[i]}"_UTF-8 vim -s encodingTransform.vim ${C_CPP_HEAD_FILES[i]} if [ $? -eq 0 ];then # mv "${C_CPP_HEAD_FILES[i]}"_UTF-8 ${C_CPP_HEAD_FILES[i]} touch -r "${C_CPP_HEAD_FILES[i]}"_BK ${C_CPP_HEAD_FILES[i]} rm -f "${C_CPP_HEAD_FILES[i]}"_BK else echo "iconv -f ${FD_ENCODING} -t UTF-8 ${C_CPP_HEAD_FILES[i]} -o "${C_CPP_HEAD_FILES[i]}"_UTF-8 err!" >> ${ICONV_LOG} fi dos2unix ${C_CPP_HEAD_FILES[i]} fi # echo "${C_CPP_HEAD_FILES[i]}" >> ${C_CPP_HEAD_FILES_FD} done