最近开发的语音识别的项目经常碰到转码的问题,各种宽窄字节转换,ASNI, UTF8, 宽字节 …… 代码写得冗长,繁琐,维护性太差了。决定自己写一个能直接使用各种编码的字符串类,于是实现了一个,功能不多,但是也够用。由于string 和 wstring 在多线程下共享数据会出问题,这里只用最基础的char 和 wchar_t。
基于Windows平台,代码如下
/* ************************************************** Title: 自定义字符串类 ************************************************** Date:2014/12/14 ************************************************** author:刘旭 ************************************************** Blog:http://blog.csdn.net/liuxucoder ************************************************** */ #ifndef __YKSTRING #define __YKSTRING #include <cstring> #include <new> #include <tchar.h> #include <strsafe.h> #include <Windows.h> #define MAX_COUNT 1024000 typedef unsigned char BYTE; //十进制转十六进制 inline BYTE Int_to_Hex(const BYTE src) { return src > 9 ? (src+55) : (src+48); }
//十六进制转十进制 inline BYTE Hex_to_Int(const BYTE &x) { return isdigit(x) ? x-'0' : x-'A'+10; } /* *@自定义字符串类 */ class CYkString { public: /* *@目的: 字符串转码,由 Ansi 转为 wchar_t *@参数: const char* szAnsi 转码前 Ansi 字符串 *@参数: wchar_t* szWchar 转码后 wchar_t 字符串 *@返回值: bool 成功返回true,失败返回false */ bool Ansi_to_Wchar(const char* szAnsi,wchar_t*& szWchar) { bool res = false; size_t nLen = MultiByteToWideChar(CP_ACP, NULL, szAnsi, -1, NULL, 0); wchar_t* strTemp = NULL; nLen = (nLen+1)*sizeof(wchar_t); __try{ if(NULL == (szWchar = new wchar_t[nLen])){ __leave; } if(NULL == (strTemp = new wchar_t[nLen])){ __leave; } ZeroMemory(szWchar, nLen); ZeroMemory(strTemp, nLen); if(0 == MultiByteToWideChar(CP_ACP, NULL, szAnsi, -1, strTemp, nLen)){ __leave; } if(S_OK != StringCchCopyW(szWchar, nLen, strTemp)){ __leave; } res = true; } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; } } return res; } /* *@目的: 字符串转码,由 Utf-8 转为 wchar_t *@参数: const char* szUtf8 转码前 Utf-8 字符串 *@参数: wchar_t* szWchar 转码后 wchar_t 字符串 *@返回值: bool 成功返回true,失败返回false */ bool Utf8_to_Wchar(const char* szUtf8,wchar_t*& szWchar) { bool res = false; size_t nLen = MultiByteToWideChar(CP_UTF8, NULL, szUtf8, -1, NULL, 0); wchar_t* strTemp =NULL; nLen = (nLen+1)*sizeof(wchar_t); __try{ if(NULL == (szWchar = new wchar_t[nLen])){ __leave; } if(NULL == (strTemp = new wchar_t[nLen])){ __leave; } ZeroMemory(szWchar, nLen); ZeroMemory(strTemp, nLen); if(0 == MultiByteToWideChar(CP_UTF8, NULL, szUtf8, -1, strTemp, nLen)){ __leave; } if(S_OK != StringCchCopyW(szWchar, nLen, strTemp)){ __leave; } res = true; } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; } } return res; } /* *@目的: 字符串转码,由 wchar_t 转为 Ansi *@参数: const wchar_t* szWchar 转码前 wchar_t 字符串 *@参数: char* szAnsi 转码后 Ansi 字符串 *@返回值: bool 成功返回true,失败返回false */ bool Wchar_to_Ansi(const wchar_t* szWchar,char*& szAnsi) { bool res = false; size_t nLen = 0; char *strTemp = NULL; StringCchLengthW(szWchar, MAX_COUNT, &nLen); nLen = (nLen+1)*sizeof(wchar_t); __try{ if(NULL == (szAnsi = new char[nLen])){ __leave; } if(NULL == (strTemp = new char[nLen])){ __leave; } ZeroMemory(szAnsi, nLen); ZeroMemory(strTemp, nLen); if( 0 == WideCharToMultiByte(CP_ACP, 0, szWchar, -1, strTemp, nLen, NULL, NULL)){ __leave; } if(S_OK != StringCchCopyA(szAnsi, nLen, strTemp)){ __leave; } res = true; } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; } } return res; } /* *@目的: 字符串转码,由 wchar_t 转为 Utf-8 *@参数: const wchar_t* szWchar 转码前 wchar_t 字符串 *@参数: char* szUtf8 转码后 Utf-8 字符串 *@返回值: bool 成功返回true,失败返回false */ bool Wchar_to_Utf8(const wchar_t* szWchar,char*& szUtf8) { bool res = false; size_t nLen = 0; char *strTemp = NULL; StringCchLengthW(szWchar, MAX_COUNT, &nLen); nLen = (nLen+1)*2*sizeof(wchar_t); __try{ if(NULL == (szUtf8 = new char[nLen])){ __leave; } if(NULL == (strTemp = new char[nLen])){ __leave; } ZeroMemory(szUtf8, nLen); ZeroMemory(strTemp, nLen); if( 0 == WideCharToMultiByte(CP_UTF8, 0, szWchar, -1, strTemp, nLen, NULL, NULL)){ __leave; } if(S_OK != StringCchCopyA(szUtf8, nLen, strTemp)){ __leave; } res = true; } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; } } return res; } /* *@目的: 字符串转码,由 Ansi 转为 Utf-8 *@参数: const char * szAnsi 转码前 Ansi 字符串 *@参数: char* szUtf8 转码后 Utf-8 字符串 *@返回值: bool 成功返回true,失败返回false */ bool Ansi_to_Utf8(const char* szAnsi,char*& szUtf8) { bool res = false; size_t nLen = 0; wchar_t* strTemp = NULL; StringCchLengthA(szAnsi, MAX_COUNT, &nLen); __try{ if(NULL == (strTemp = new wchar_t[nLen+1])){ __leave; } if(false == Ansi_to_Wchar(szAnsi, strTemp)){ __leave; } if(false == Wchar_to_Utf8(strTemp, szUtf8)){ __leave; } res = true; } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; } } return res; } /* *@目的: 字符串转码,由 Utf-8 转为 Ansi *@参数: const char * szUtf8 转码前 Utf-8 字符串 *@参数: char* szAnsi 转码后 Ansi 字符串 *@返回值: bool 成功返回true,失败返回false */ bool Utf8_to_Ansi(const char* szUtf8,char*& szAnsi) { bool res = false; size_t nLen = 0; wchar_t* strTemp = NULL; StringCchLengthA(szUtf8, MAX_COUNT, &nLen); __try{ if(NULL == (strTemp = new wchar_t[nLen+1])){ __leave; } if(false == (Utf8_to_Wchar(szUtf8, strTemp))){ __leave; } if(false ==Wchar_to_Ansi(strTemp, szAnsi)){ __leave; } res = true; } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; } } return res; } public: CYkString(size_t len = 256){ Last_Style = Style_Char; IsAnsi = true; m_char = NULL; m_wchar = NULL; try{ m_char = new char[len]; m_wchar = new wchar_t[len]; ZeroMemory(m_char, len); ZeroMemory(m_wchar, len); } catch(std::bad_alloc){ if(m_char){ delete []m_char; } if(m_wchar){ delete []m_wchar; } throw; //此处抛出异常,停止构造函数,释放资源 } }; ~CYkString(void){ if(NULL != m_char){ delete []m_char; m_char = NULL; } if(NULL != m_wchar){ delete []m_wchar; m_wchar = NULL; } }; /* *@目的: 刷新字符串,是每种字符串储存内容一致 *@参数: 无 *@返回值: bool 成功返回 true 失败返回 false */ bool FlushString(){ if(Style_Char == Last_Style){ if(false == IsAnsi){ return Utf8_to_Wchar(m_char, m_wchar); }else{ return Ansi_to_Wchar(m_char, m_wchar); } } else if(Style_WChar == Last_Style){ if(false == IsAnsi){ return Wchar_to_Utf8(m_wchar, m_char); } else{ return Wchar_to_Ansi(m_wchar, m_char); } } } /* *@目的: 使用字符串的char*格式 *@参数: 无 *@返回值: char* char*字符串地址 */ char* asChar_str(){ FlushString(); Last_Style = Style_Char; return m_char; } /* *@目的: 使用字符串的wchar_t*格式 *@参数: 无 *@返回值: wchar_t* wchar_t*字符串地址 */ wchar_t* asWchar_str(){ FlushString(); Last_Style = Style_WChar; return m_wchar; } /* *@目的: 返回字符串的长度 *@参数: 无 *@返回值: size_t 字符串的长度 */ size_t GetLength(){ FlushString(); size_t nLen = 0; StringCchLengthW(m_wchar, MAX_COUNT, &nLen); return nLen; } /* *@目的: 判断字符串是否为空 *@参数: 无 *@返回值: bool 空则返回true,非空则返回false */ bool IsEmpty(){ FlushString(); return GetLength() ? false : true; } /* *@目的: 将字符串转换为Ansi编码形式 *@参数: 无 *@返回值: char* 转码后的字符串 */ char* ToAnsi(){ FlushString(); if(false == IsAnsi){ IsAnsi = true; if(false == Utf8_to_Ansi(m_char, m_char)){ m_char = NULL; } } return m_char; }; /* *@目的: 将字符串转换为Utf-8编码形式 *@参数: 无 *@返回值: char* 转码后的字符串 */ char* ToUtf_8(){ FlushString(); if(true == IsAnsi){ IsAnsi = false; if(false == Ansi_to_Utf8(m_char, m_char)){ m_char = NULL; } } return m_char; }; /* *@目的: 将Utf-8格式字符串用Urlcode编码 *@参数: 无 *@返回值: char* 编码后的字符串 */ char* UrlEncode_Utf8() { bool IsAnsiTemp = IsAnsi; ToUtf_8(); char* result = NULL; size_t nLen = 0; StringCchLengthA(m_char, MAX_COUNT, &nLen); size_t pos = 0; //结果字符串的长度标记 try{ result = new char[(nLen+1)*sizeof(wchar_t)]; ZeroMemory(result, sizeof(result)); }catch(std::bad_alloc){ delete []result; result = NULL; return result; } for(size_t i = 0; i < nLen; i++){ if(isalnum((BYTE)m_char[i]) || /*判断是否为字母或者数字 必须进行类型转换*/ ':' == m_char[i] || '/' == m_char[i] || '_' == m_char[i] || '.' == m_char[i] || '~' == m_char[i] || '?' == m_char[i] || '&' == m_char[i] || '=' == m_char[i] ){ result[pos++] = m_char[i]; //保持不变 } else if(' ' == m_char[i]){//如果是空格 result[pos++] = '+'; } else {//如果是其他字符 BYTE temp = Int_to_Hex((BYTE) m_char[i]); result[pos++] = '%'; result[pos++] = Int_to_Hex((BYTE)m_char[i] >> 4); result[pos++] = Int_to_Hex((BYTE)m_char[i] % 16); } } IsAnsi = IsAnsiTemp ? ToAnsi() : ToUtf_8(); //恢复原来的编码 result[pos++] = '\0'; return result; } /* *@目的: 将Ansi格式字符串用Urlcode编码 *@参数: 无 *@返回值: char* 编码后的字符串 */ char* UrlEncode_Ansi() { bool IsAnsiTemp = IsAnsi; ToAnsi(); char* result = NULL; size_t nLen = 0; StringCchLengthA(m_char, MAX_COUNT, &nLen); size_t pos = 0; //结果字符串的长度标记 try{ result = new char[(nLen+1)*sizeof(wchar_t)]; ZeroMemory(result, sizeof(result)); }catch(std::bad_alloc){ delete []result; result = NULL; return result; } for(size_t i = 0; i < nLen; i++){ if(isalnum((BYTE)m_char[i]) || /*判断是否为字母或者数字 必须进行类型转换*/ ':' == m_char[i] || '/' == m_char[i] || '_' == m_char[i] || '.' == m_char[i] || '~' == m_char[i] || '?' == m_char[i] || '&' == m_char[i] || '=' == m_char[i] ){ result[pos++] = m_char[i]; //保持不变 } else if(' ' == m_char[i]){//如果是空格 result[pos++] = '+'; } else {//如果是其他字符 BYTE temp = Int_to_Hex((BYTE) m_char[i]); result[pos++] = '%'; result[pos++] = Int_to_Hex((BYTE)m_char[i] >> 4); result[pos++] = Int_to_Hex((BYTE)m_char[i] % 16); } } IsAnsi = IsAnsiTemp ? ToAnsi() : ToUtf_8(); //恢复原来的编码 result[pos++] = '\0'; return result; } /* *@目的: 将字符串用Urlcode解码 *@参数: 无 *@返回值: char* 解码后的字符串(格式取决于字符串本身的格式) */ char* UrlDecode() { FlushString(); char* result = NULL; size_t nLen = 0; StringCchLengthA(m_char, MAX_COUNT, &nLen); size_t pos = 0; //结果字符串的长度标记 try{ result = new char[(nLen+1)*sizeof(wchar_t)]; ZeroMemory(result, sizeof(result)); }catch(std::bad_alloc){ delete []result; result = NULL; return result; } for(size_t i = 0; i < nLen; i++){ if('%' == m_char[i]){ //判断是否为汉字 BYTE cha; cha = Hex_to_Int(m_char[i+1])<<4; cha |= Hex_to_Int(m_char[i+2]); result[pos++] = (char)cha; i += 2; } else if('+' == m_char[i]){//如果是空格 result[pos++] = ' '; } else {//如果是其他字符 result[pos++] = m_char[i]; //保持不变 } } result[pos++] = '\0'; return result; } /* *@目的: 将字符串格式化(char*) *@参数: char* Format 格式化字符串 *@参数: ... 参数列表 *@返回值: int 格式化参数的个数, -1表示失败 */ int Format(char* Format, ...) { Last_Style = Style_Char; va_list argList = NULL; size_t nLen = 0; va_start(argList, Format); nLen = _vsprintf_p(NULL, 0, Format, argList); try{ m_char = new char[nLen+1]; _vsprintf_p(m_char, nLen + 1, Format, argList); return FlushString(); } catch(std::bad_alloc){ delete []m_char; m_char = NULL; return -1; } } /* *@目的: 将字符串格式化(wchar_t*) *@参数: wchar_t* Format 格式化字符串 *@参数: ... 参数列表 *@返回值: int 格式化参数的个数, -1表示失败 */ int Format(wchar_t* Format, ...) { Last_Style = Style_WChar; va_list argList = NULL; size_t nLen = 0; va_start(argList, Format); nLen = _vswprintf_p(NULL, 0, Format, argList); try{ m_wchar = new wchar_t[nLen+1]; _vswprintf_p(m_wchar, nLen + 1, Format, argList); return FlushString(); } catch(std::bad_alloc){ delete []m_wchar; m_wchar = NULL; return -1; } } public: //==运算符重载 bool operator == (const char* obj){ return 0 == strncmp(m_char, obj, MAX_COUNT) ? true : false; } bool operator == (const wchar_t* obj){ return CSTR_EQUAL == CompareStringOrdinal(m_wchar, -1, obj, -1, false) ? true : false; } bool operator == (CYkString &obj){ return CSTR_EQUAL == CompareStringOrdinal(m_wchar, -1, obj.asWchar_str(), -1, false) ? true : false; } //=运算符重载 CYkString& operator = (const char* obj) { Last_Style = Style_Char; if(*this == obj){ return *this; } size_t nLen1 = GetLength()*sizeof(wchar_t); size_t nLen2 = 0; StringCchLengthA(obj, MAX_COUNT, &nLen2); nLen2 += 1; if(nLen1 >= nLen2){ StringCchCopyA(m_char, nLen1, obj); } else{ char *strTemp = NULL; __try{ if(NULL == (strTemp = new char[nLen2])){ __leave; } if(S_OK != StringCchCopyA(strTemp, nLen2, obj)){ __leave; } delete m_char; m_char = NULL; if(NULL == (m_char = new char[nLen2])){ __leave; } if(S_OK != StringCchCopyA(m_char, nLen2, strTemp)){ __leave; } } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; FlushString(); } } } return *this; } CYkString& operator = (const wchar_t* obj) { Last_Style = Style_WChar; if(*this == obj){ return *this; } size_t nLen1 = GetLength()*sizeof(wchar_t); size_t nLen2 = 0; StringCchLengthW(obj, MAX_COUNT, &nLen2); nLen2 *= sizeof(wchar_t); if(nLen1 >= nLen2){ StringCchCopyW(m_wchar, nLen1, obj); } else{ wchar_t *strTemp = NULL; __try{ if(NULL == (strTemp = new wchar_t[nLen2])){ __leave; } if(S_OK != StringCchCopyW(strTemp, nLen2, obj)){ __leave; } delete []m_wchar; m_wchar = NULL; if(NULL == (m_wchar = new wchar_t[nLen2])){ __leave; } if(S_OK != StringCchCopyW(m_wchar, nLen2, strTemp)){ __leave; } } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; FlushString(); } } } return *this; } CYkString& operator = (CYkString &obj) { return *this = obj.asWchar_str(); } //+=运算符重载 CYkString& operator += (const char* obj) { Last_Style = Style_Char; size_t nLen1 = GetLength()*sizeof(wchar_t) + 1; size_t nLen2 = 0; StringCchLengthA(obj, MAX_COUNT, &nLen2); nLen2 += 1; char *strTemp = NULL; __try{ if(NULL == (strTemp = new char[nLen1])){ __leave; } if(S_OK != StringCchCopyA(strTemp, nLen1, m_char)){ __leave; } delete []m_wchar; m_wchar = NULL; if(NULL == (m_char = new char[nLen1+nLen2])){ __leave; } if(S_OK != StringCchCopyA(m_char, nLen1, strTemp)){ __leave; } if(S_OK != StringCchCatA(m_char, nLen1+nLen2, obj)){ __leave; } } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; FlushString(); } } return *this; } CYkString& operator += (const wchar_t* obj) { Last_Style = Style_WChar; size_t nLen1 = GetLength()*sizeof(wchar_t); size_t nLen2 = 0; StringCchLengthW(obj, MAX_COUNT, &nLen2); nLen2 *= sizeof(wchar_t); wchar_t *strTemp = NULL; __try{ if(NULL == (strTemp = new wchar_t[nLen1])){ __leave; } if(S_OK != StringCchCopyW(strTemp, nLen1, m_wchar)){ __leave; } delete []m_wchar; m_wchar = NULL; if(NULL == (m_wchar = new wchar_t[nLen1+nLen2])){ __leave; } if(S_OK != StringCchCopyW(m_wchar, nLen1, strTemp)){ __leave; } if(S_OK != StringCchCatW(m_wchar, nLen1+nLen2, obj)){ __leave; } } __finally{ if(NULL != strTemp){ delete []strTemp; strTemp = NULL; FlushString(); } } return *this; } CYkString& operator += (CYkString &obj) { return *this += obj.asWchar_str(); } protected: char* m_char; //内部字符串 char* 窄字节类型 wchar_t* m_wchar; //内部字符串 wchar* 宽字节类型 private: enum LASTCHANGE{ Style_Char = 1, Style_WChar = 2 };//最后改变的数据类型 bool IsAnsi; //字符串是否是Ansi编码格式 int Last_Style; //最后改变的数据类型标记 }; #endif // !__YKSTRING