繁简体(GB<=>Big5)中文字符的转化实现原理很简单,就是根据两种码表的编码规则,创建两者之间的字符对应关系表,通过程序读取这个映射表来自动查出另一种编码方式下对应字符的字节编码,从而进行逐字节的内容替换。
主功能实现的GB2Big5.java源代码如下:
package zeal.util; import java.io.*; /** * 用来处理GB2312/BIG5码字符互相转换的类. * 需要两个码表文件:/zeal/util/gb-big5.table,/zeal/util/big5-gb.table. * 这两个码表可以根据具体情况补充映射不正确的码. * <p>Title: GB<=>Big5</p> * <p>Description: Deal with the convertion between gb2312 and big5 charset Strings.</p> * <p>Copyright: Copyright (c) 2004</p> * <p>Company: NewmenBase</p> * @author Zeal Li * @version 1.0 * * @see zeal.util.StreamConverter */ public class GB2Big5{ private static GB2Big5 pInstance = null; private String s_big5TableFile = null; private String s_gbTableFile = null; private byte[] b_big5Table = null; private byte[] b_gbTable = null; /** 指定两个码表文件来进行初始化 */ private GB2Big5(String sgbTableFile,String sbig5TableFile) throws NullPointerException{ s_big5TableFile = sbig5TableFile; s_gbTableFile = sgbTableFile; if(null == b_gbTable){ b_gbTable = getBytesFromFile(sgbTableFile); } if(null == b_big5Table){ b_big5Table = getBytesFromFile(sbig5TableFile); } if(null == b_gbTable){ throw new NullPointerException("No gb table can be load"); } if(null == b_big5Table){ throw new NullPointerException("No big5 table can be load"); } } public static synchronized GB2Big5 getInstance(){ //return getInstance("d:\\gb-big5.table","d:\\big5-gb.table"); return getInstance("/zeal/util/gb-big5.table", "/zeal/util/big5-gb.table"); } public static synchronized GB2Big5 getInstance(String sgbTableFile, String sbig5TableFile){ if(null == pInstance){ try{ pInstance = new GB2Big5(sgbTableFile,sbig5TableFile); } catch(Exception e){ System.err.println(e.toString()); pInstance = null; } } return pInstance; } /** 把gbChar对应的big5字符替换掉,用来更新码表文件. * 一般当发现字符映射不正确的时候可以通过这个方法来校正. */ protected synchronized void resetBig5Char(String gbChar,String big5Char) throws Exception{ byte[] Text = new String(gbChar.getBytes(),"GBK").getBytes("GBK"); byte[] TextBig5 = new String(big5Char.getBytes(), "BIG5").getBytes("BIG5"); int max = Text.length - 1; int h = 0; int l = 0; int p = 0; int b = 256; byte[] big = new byte[2]; for(int i = 0; i < max; i++){ h = (int) (Text[i]); if(h < 0){ h = b + h; l = (int) (Text[i + 1]); if(l < 0){ l = b + (int) (Text[i + 1]); } if(h == 161 && l == 64){ ; // do nothing } else{ p = (h - 160) * 510 + (l - 1) * 2; b_gbTable[p] = TextBig5[i]; b_gbTable[p + 1] = TextBig5[i + 1]; } i++; } } BufferedOutputStream pWriter = new BufferedOutputStream(new FileOutputStream(s_gbTableFile)); pWriter.write(b_gbTable,0,b_gbTable.length); pWriter.close(); } /** 把big5Char对应的gb字符替换掉,用来更新码表文件. * 一般当发现字符映射不正确的时候可以通过这个方法来校正. */ protected synchronized void resetGbChar(String big5Char,String gbChar) throws Exception{ byte[] TextGb = new String(gbChar.getBytes(),"GBK").getBytes("GBK"); byte[] Text = new String(big5Char.getBytes(),"BIG5").getBytes("BIG5"); int max = Text.length - 1; int h = 0; int l = 0; int p = 0; int b = 256; byte[] big = new byte[2]; for(int i = 0; i < max; i++){ h = (int) (Text[i]); if(h < 0){ h = b + h; l = (int) (Text[i + 1]); if(l < 0){ l = b + (int) (Text[i + 1]); } if(h == 161 && l == 64){ ; // do nothing } else{ p = (h - 160) * 510 + (l - 1) * 2; b_big5Table[p] = TextGb[i]; b_big5Table[p + 1] = TextGb[i + 1]; } i++; } } BufferedOutputStream pWriter = new BufferedOutputStream(new FileOutputStream(s_big5TableFile)); pWriter.write(b_big5Table,0,b_big5Table.length); pWriter.close(); } /** 把gb2312编码的字符串转化成big5码的字节流 */ public byte[] gb2big5(String inStr) throws Exception{ if(null == inStr || inStr.length() <= 0){ return "".getBytes(); //return ""; } byte[] Text = new String(inStr.getBytes(),"GBK").getBytes("GBK"); int max = Text.length - 1; int h = 0; int l = 0; int p = 0; int b = 256; byte[] big = new byte[2]; for(int i = 0; i < max; i++){ h = (int) (Text[i]); if(h < 0){ h = b + h; l = (int) (Text[i + 1]); if(l < 0){ l = b + (int) (Text[i + 1]); } if(h == 161 && l == 64){ big[0] = big[1] = (byte) (161 - b); } else{ p = (h - 160) * 510 + (l - 1) * 2; try{ big[0] = (byte) (b_gbTable[p] - b); } catch(Exception e){ big[0] = 45; } try{ big[1] = (byte) (b_gbTable[p + 1] - b); } catch(Exception e){ big[1] = 45; } } Text[i] = big[0]; Text[i + 1] = big[1]; i++; } } return Text; //return new String(Text); } /** 把big5码的字符串转化成gb2312码的字符串 */ public String big52gb(String inStr) throws Exception{ if(null == inStr || inStr.length() <= 0){ return ""; } byte[] Text = new String(inStr.getBytes(),"BIG5").getBytes("BIG5"); int max = Text.length - 1; int h = 0; int l = 0; int p = 0; int b = 256; byte[] big = new byte[2]; for(int i = 0; i < max; i++){ h = (int) (Text[i]); if(h < 0){ h = b + h; l = (int) (Text[i + 1]); if(l < 0){ l = b + (int) (Text[i + 1]); } if(h == 161 && l == 161){ big[0] = (byte) (161 - b); big[1] = (byte) (64 - b); } else{ p = (h - 160) * 510 + (l - 1) * 2; try{ big[0] = (byte) (b_big5Table[p] - b); } catch(Exception e){ big[0] = 45; } try{ big[1] = (byte) (b_big5Table[p + 1] - b); } catch(Exception e){ big[1] = 45; } } Text[i] = big[0]; Text[i + 1] = big[1]; i++; } } return new String(Text); } /** 把文件读入字节数组,读取失败则返回null */ private static byte[] getBytesFromFile(String inFileName){ try{ InputStream in = GB2Big5.class.getResourceAsStream(inFileName); byte[] sContent = StreamConverter.toByteArray(in); in.close(); return sContent; /* java.io.RandomAccessFile inStream = new java.io.RandomAccessFile( inFileName,"r"); byte[] sContent = new byte[ (int) (inStream.length())]; inStream.read(sContent); inStream.close(); return sContent; */ } catch(Exception e){ e.printStackTrace(); return null; } } public static void main(String[] args) throws Exception{ if(args.length < 2){ System.out.println( "Usage: zeal.util.GB2Big5 [-gb | -big5] inputstring"); System.exit(1); return; } boolean bIsGB = true; String inStr = ""; for(int i = 0; i < args.length; i++){ if(args[i].equalsIgnoreCase("-gb")){ bIsGB = true; } else if(args[i].equalsIgnoreCase("-big5")){ bIsGB = false; } else{ inStr = args[i]; } } GB2Big5 pTmp = GB2Big5.getInstance(); String outStr = ""; if(bIsGB){ outStr = pTmp.big52gb(inStr); } else{ outStr = new String(pTmp.gb2big5(inStr),"BIG5"); } System.out.println("String [" + inStr + "] converted into:\n[" + outStr + "]"); } }
使用示例:
如果下载发布形式的zip类库包,可以在命令行下输入
java -classpath GB2Big5.zip zeal.util.GB2Big5 -big5 中国
可以看到繁体转化的输出情况。
或者自己写一个测试的class如下:
import zeal.util.*; public class MyTest{ public static void main(String[] args) throws Exception{ if(args.length < 2){ System.out.println( "Usage: MyTest [-gb | -big5] inputstring"); System.exit(1); return; } boolean bIsGB = true; String inStr = ""; for(int i = 0; i < args.length; i++){ if(args[i].equalsIgnoreCase("-gb")){ bIsGB = true; } else if(args[i].equalsIgnoreCase("-big5")){ bIsGB = false; } else{ inStr = args[i]; } } // 得到一个繁简体字符处理的实例 GB2Big5 pTmp = GB2Big5.getInstance(); String outStr = ""; if(bIsGB){ // 如果需要把big5码的字符转化成gb2312的,就使用big52gb()方法。 // 传入字符串参数,传出的也是字符串。 outStr = pTmp.big52gb(inStr); } else{ // 如果需要把gb2312码的字符转化成big5的,就使用gb2big5()方法。 // 传入的是字符串参数,传出的是字节数组(因为有可能需要把big5码的内容 // 写入文件,就必须用字节数组的方式写入,否则经过字节->字符串的转化之后 // 再写入文件就变成乱码了)。如果需要直接显示出来,就new一个BIG5的字符 // 串就行了。 outStr = new String(pTmp.gb2big5(inStr),"BIG5"); } System.out.println( "String [" + inStr + "] converted into:\n[" + outStr + "]"); } }
############################## 直接调用GB2Big5只适用于对于少量字符的转化,当需要对整个jsp页面根据用户需要进行编码转化 的时候,就需要使用到taglib的功能。 具体配置使用步骤如下:
1.在WEB-INF/目录下增加GB2Big5Wrapper.tld文件,内容如下: <?xml version="1.0" encoding="ISO-8859-1"?> <!DOCTYPE taglib PUBLIC "-//Sun Microsystems, Inc.//DTD JSP Tag Library 1.2//EN" "http://java.sun.om/dtd/web-jsptaglibrary_1_2.dtd"> <taglib> <tlib-version>1.0</tlib-version> <jsp-version>1.2</jsp-version> <short-name>zealLi</short-name> <tag> <name>GB2Big5Wrapper</name> <tag-class>zeal.util.GB2Big5Wrapper</tag-class> <attribute> <name>isbig5</name> <rtexprvalue>true</rtexprvalue> <type>boolean</type> </attribute> </tag> </taglib> 2.在需要进行转化的JSP页面里面加上: <%@ taglib uri="/WEB-INF/GB2Big5Wrapper.tld" prefix="zealLi"%> <zealLi:GB2Big5Wrapper isbig5="true"> 任何你需要转化的东西 </zealLi:GB2Big5Wrapper>
比如test.jsp源代码如下 => <%@ page contentType="text/html; charset=GBK" import="javax.servlet.http.HttpSession" import="java.util.*" import="com.zealLi.*" %><% String encode = request.getParameter("encode"); if(null == encode || encode.length() <= 0){ encode = "BIG5"; } boolean isBig5 = false; String charset = "GB2312"; if(encode.equalsIgnoreCase("BIG5")){ isBig5 = true; charset = "BIG5"; } String sInfo = "中文字体繁简体转化的测试。"; %><%@ taglib uri="/WEB-INF/GB2Big5Wrapper.tld" prefix="zealLi"%> <zealLi:GB2Big5Wrapper isbig5="<%= isBig5 %>"> <html> <head> <title>Jsp测试页面</title> <meta http-equiv="Content-Type" content="text/html; charset=<%=charset%>"> </head> <body> <% Calendar now = Calendar.getInstance(); out.println(now.get(Calendar.YEAR) + "." + (now.get(Calendar.MONTH)+1) + "." + now.get(Calendar.DAY_OF_MONTH) + "<p>"); %> <p> <%=sInfo%> </body> </html></zealLi:GB2Big5Wrapper>
附件 GB2Big5.zip(108,182 bytes): 发布形式的类库zip文件,可直接使用
附件 GB2Big5_Project.zip(38,685 bytes): JBuilder工程文件,提供完整的源代码
From: http://www.zeali.net/blog/entry.php?id=19
本文转自RubyPdf 的中文博客博客园博客,原文链接:http://www.cnblogs.com/hardrock/archive/2006/02/10/328315.html,如需转载请自行联系原作http://www.cnblogs.com/hardrock/archive/2006/05/17/402654.html