自己尝试着从头写了一个自动换行算法的测试工具,支持UTF-8。实现的重点是区分出UTF-8编码,并且识别它的全部字节。
代码如下:
#include <stdio.h>
#include <string.h>
#define LINE_CHARACTERS_COUNT 30
long GetFileSize(const char* filename);
int main (int argc, const char * argv[])
{
int Width,i;
unsigned char * pBuffer = NULL;
unsigned char * pLine = NULL;
int nLineWarp = LINE_CHARACTERS_COUNT;
int nStart=0,nEnd =0;
int nLastWordPos = 0;
int bWriteNow = 0;
int nUnicode = 0;
///!判断参数是否正确
if(argc<2)
{
printf("Invalid parameters!/n");
printf("Usage: linewrap InputFile [characters_each_line]/n");
printf("Try it again!/n");
return 1;
}
///!取出相应的参数
Width = GetFileSize(argv[1]);
if(argc == 3)
nLineWarp = atoi(argv[2]);
///! Allocate memory
pBuffer = (unsigned char *)malloc(Width);
pLine = (unsigned char *)malloc(nLineWarp+512);
FILE * fp = fopen(argv[1], "r");
///!如果打开文件失败
if(!fp)
{
//Failed to open source file
free(pBuffer);
free(pLine);
printf("Cannot open %s!/n",argv[1]);
return 2;
}
///!Read data and close it soon
Width = fread(pBuffer,1,Width,fp);
fclose(fp);
printf("=======OUTPUT=======/n");
///!开始进行换行处理并输出
for(i=0;i<Width;i++)
{
switch(pBuffer[i])
{
case '/n':
case '/r': //Line break
bWriteNow = 1;
case ' ':
case ',':
case ';':
case '.':
case '!': //Word splitters
nEnd = i;
nLastWordPos = i;
if(nEnd-nStart>=nLineWarp)
bWriteNow = 1;
break;
default: ///<Normal characters
nEnd = i;
///!check UTF-8 decode
nUnicode = pBuffer[i]>>5 & 0x07;
switch(nUnicode)
{
case 0x07: ///<Three-bytes UTF-8
i++; ///<it will enter 0x06 then.
case 0x06: ///<Two-bytes UTF-8
i++;
nEnd = i;
nLastWordPos = i;
break;
default:
///!unsupported UTF-8
break;
}
if(nEnd-nStart>=nLineWarp)
bWriteNow = 1;
}
if(bWriteNow && nLastWordPos-nStart >0)
{
///!If has output data
strncpy(pLine,pBuffer+nStart,nLastWordPos-nStart+1);
pLine[nLastWordPos-nStart+1] = '/0';
printf("%s/n",pLine);
nStart = nLastWordPos+1;
bWriteNow = 0; ///<clear output flag here
}
else
bWriteNow = 0; ///<clear output flag while has no any output string
}
if(nEnd>nStart)
{
printf("%s/n",pBuffer+nStart);
}
free(pLine);
free(pBuffer);
return 0;
}
///!Get the file size
long GetFileSize(const char* filename)
{
///! 不同系统下有不同的实现方法,只要返回指定文件的大小即可
///! Linux下可以使用stat函数取得
}