Unicode的编码形式与对应的字符串相互转换
/*** Unicode的编码形式与对应的字符串相互转换* @author 白乾涛*/public class UnicodeUtils {public static void main(String[] args) throws UnsupportedEncodingException {test1();test2();//服务器返回的很可能是这种形式的字符串String unicodeMessages = "{\"code\":200,\"message\":\"\\u4fdd\\u5b58\\u6210\\u529f\"}";//所有的引号和反斜杠前面都要加一个反斜杠System.out.println(unicodeToString(unicodeMessages) + "\n");//{"code":200,"message":"保存成功"}}private static void test1() {System.out.println("【汉】的Unicode编码为【" + toUnicodeString('汉') + "】");//【汉】的Unicode编码为【\u6c49】System.out.println("【\\u6c49】对应的字符为【" + unicodeToString("\u6c49") + "】");//【\u6c49】对应的字符为【汉】System.out.println("\u6c49");//会自动解码【汉】System.out.println(0x9FFF - 0x3000 + "\n");//【28671】}private static void test2() {String string = "aA1:中国";String unicode = toUnicodeString(string);System.out.println("【" + string + "】的Unicode编码为【" + unicode + "】");//【\u0061\u0041\u0031\u003a\u4e2d\u56fd】System.out.println(unicodeToString(unicode));//【aA1:中国】for (int i = 0; i < unicode.length(); i += 6) {System.out.print(unicode.substring(i, i + 6));//并不会解码,只是把Unicode编码形式的字符串打印出来,为什么呢?}System.out.println("\n" + unicode);//同样,这里也不会解码。【\u0061\u0041\u0031\u003a\u4e2d\u56fd】}/*** 将【字符】转换成Unicode码形式【Unicode用两个字节来编码一个字符,2^16=16^4,所以可以用四个16进制数表示】*/public static String toUnicodeString(char c) {StringBuilder sb = new StringBuilder("");String hexString = Integer.toHexString(c);sb.append("\\u");//用于标识这是一个Unicode码for (int j = hexString.length(); j < 4; j++) {sb.append(0);//不足四位用0填充}sb.append(hexString);return sb.toString();}/*** 将【字符串】转换成Unicode码形式*/public static String toUnicodeString(String string) {StringBuilder sb = new StringBuilder("");for (int i = 0; i < string.length(); i++) {sb.append(toUnicodeString(string.charAt(i)));}return sb.toString();}/*** 将Unicode编码解析成字符串形式*/public static String unicodeToString(String uString) {StringBuilder sb = new StringBuilder();int i = -1, pos = 0;while ((i = uString.indexOf("\\u", pos)) != -1) {sb.append(uString.substring(pos, i));if (i + 5 < uString.length()) {pos = i + 6;sb.append((char) Integer.parseInt(uString.substring(i + 2, i + 6), 16));}}sb.append(uString.substring(pos));return sb.toString();}}
字符串编码格式转换
/**
* 字符串编码格式转换
* @author 白乾涛
*/
public class CodeUtils {
public static void main(String[] args) {
String string = "aA.1字符串编码格式转换";
System.out.println(gbk2Unicode(string));
System.out.println(gbk2utf8(string));
System.out.println(utf2gbk(string));
System.out.println(utf2Unicode(string));
System.out.println(unicode2GBK(string));
System.out.println(unicode2Utf8(string));
}
/**
* GBK转UTF8
* @param gbk GBK编码格式的字符串
* @return UTF8编码格式的字符串
*/
public static String gbk2utf8(String gbk) {
String l_temp = gbk2Unicode(gbk);
l_temp = unicode2Utf8(l_temp);
return l_temp;
}
/**
* UTF8转GBK
* @param utf UTF8编码格式的字符串
* @return GBK编码格式的字符串
*/
public static String utf2gbk(String utf) {
String l_temp = utf2Unicode(utf);
l_temp = unicode2GBK(l_temp);
return l_temp;
}
/**
*GBK转Unicode
* @param str GBK编码格式的字符串
* @return String Unicode编码格式的字符串
*/
public static String gbk2Unicode(String str) {
StringBuffer result = new StringBuffer();
for (int i = 0; i < str.length(); i++) {
char chr1 = (char) str.charAt(i);
if ((chr1 & (0x00FF)) == chr1) {
result.append(chr1);
continue;
}
result.append("\\u" + Integer.toHexString((int) chr1));
}
return result.toString();
}
/**
*Unicode转GBK
* @param dataStr Unicode编码格式的字符串
* @return String GBK编码格式的字符串
*/
public static String unicode2GBK(String dataStr) {
int index = 0;
StringBuffer buffer = new StringBuffer();
int li_len = dataStr.length();
while (index < li_len) {
if (index >= li_len - 1 || !"\\u".equals(dataStr.substring(index, index + 2))) {
buffer.append(dataStr.charAt(index));
index++;
continue;
}
String charStr = "";
charStr = dataStr.substring(index + 2, index + 6);
char letter = (char) Integer.parseInt(charStr, 16);
buffer.append(letter);
index += 6;
}
return buffer.toString();
}
/**
* UTF8转Unicode
* @param inStr UTF8编码格式的字符串
* @return Unicode编码格式的字符串
*/
public static String utf2Unicode(String inStr) {
char[] myBuffer = inStr.toCharArray();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < inStr.length(); i++) {
UnicodeBlock ub = UnicodeBlock.of(myBuffer[i]);
if (ub == UnicodeBlock.BASIC_LATIN) {
sb.append(myBuffer[i]);
} else if (ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
int j = (int) myBuffer[i] - 65248;
sb.append((char) j);
} else {
short s = (short) myBuffer[i];
String hexS = Integer.toHexString(s);
String unicode = "\\u" + hexS;
sb.append(unicode.toLowerCase());
}
}
return sb.toString();
}
/**
*Unicode转UTF8
* @param theString Unicode编码格式的字符串
* @return String UTF8编码格式的字符串
*/
public static String unicode2Utf8(String theString) {
char aChar;
int len = theString.length();
StringBuffer outBuffer = new StringBuffer(len);
for (int x = 0; x < len;) {
aChar = theString.charAt(x++);
if (aChar == '\\') {
aChar = theString.charAt(x++);
if (aChar == 'u') {
// Read the xxxx
int value = 0;
for (int i = 0; i < 4; i++) {
aChar = theString.charAt(x++);
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
throw new IllegalArgumentException("Malformed \\uxxxx encoding.");
}
}
outBuffer.append((char) value);
} else {
if (aChar == 't') aChar = '\t';
else if (aChar == 'r') aChar = '\r';
else if (aChar == 'n') aChar = '\n';
else if (aChar == 'f') aChar = '\f';
outBuffer.append(aChar);
}
} else outBuffer.append(aChar);
}
return outBuffer.toString();
}
}