|
马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。
您需要 登录 才可以下载或查看,没有账号?注册
x
发信人: intranetworm (小虫), 信区: Java
标 题: Unicode 汉字与GB内码的转换
发信站: BBS 水木清华站 (Wed Aug 27 13:44:45 1997)
这是我编的一个转换程序,使用时现将前面的码表存成文件,例如table.txt
创建一个GBUnicode实例,new GBUnicode("table.txt")
以后调用GB2Uni和Uni2GB作内码转换.注意GB内码是用两个字节表示的.
- import java.io.*;
- import java.util.Hashtable;
- class GBUnicode{
- byte high[]=new byte[6763],low[]=new byte[6763];
- char unichar[]=new char[6763];
- Hashtable UniGB;
- public GBUnicode(String table_file)throws IOException
- {
- //BufferedInputStream tables=new BufferedInputStream (new FileIn
- DataInputStream tables=new DataInputStream (new FileInputStream(
- int i,n=0;
- byte b,bl,bh,num[]=new byte[20];
- UniGB=new Hashtable(7000,1);
- while (n<6763 ){
- do{
- bh=(byte)tables.read();
- }while ((char)bh<=' '); //find first non-blank char
- bl=(byte)tables.read();
- high[n]=bh;
- low[n]=bl;
- do{
- b=(byte)tables.read();
- }while (b!=(byte)':'); //find ':'
- do{
- b=(byte)tables.read();
- }while ((char)b<=' '); //find next non-blank char to rea
- i=0;
- while ((char)b>='0' && (char)b<='9'){
- num[i++]=b;
- b=(byte)tables.read();
- }
- unichar[n]=(char)Integer.parseInt(new String(num,0,0,i))
- if (UniGB.get(new Character(unichar[n]))!= null)
- System.out.println("Duplicated : "+unichar[n]);
- UniGB.put(new Character(unichar[n]),new Integer(n));
- n=n+1;
- }
- tables.close();
- }
- private int getGBindex(byte high,byte low){
- int i,j;
- i=high-(byte)0xb0;
- j=low-(byte)0xa1;
- if (i <39) {// L1 Chinese
- if (j<0 || j>94)
- return -1;
- return (i*94+j);
- }
- else if (i==39) {//one of the last 89 L1 Chinese
- if (j<0 || j>89)
- return -1;
- return (i*94+j);
- }
- else {//L2 Chinese
- if (j<0 || j>94)
- return -1;
- return (i*94+j-5);
- }
- }
- public byte[] Uni2GB(char unicode) {
- Integer index=(Integer)UniGB.get(new Character(unicode));
- if (index==null)
- return null;
- byte ch[]=new byte[2];
- ch[0]=high[index.intValue()];
- ch[1]=low[index.intValue()];
- return ch;
- public byte[] Uni2GB(char unicode) {
- Integer index=(Integer)UniGB.get(new Character(unicode));
- if (index==null)
- return null;
- byte ch[]=new byte[2];
- ch[0]=high[index.intValue()];
- ch[1]=low[index.intValue()];
- return ch;
- }
- public char GB2Uni(byte high, byte low) {
- int index=getGBindex(high,low);
- if (index ==-1) //not GB Chinese
- return 0;
- return(unichar[index]);
- }
- }
复制代码
--
※ 来源:·BBS 水木清华站 bbs.net.tsinghua.edu.cn·[FROM: organ.ncic.ac.c] |
|