从GB2312到Unicode转换表制作程式

文章作者 100test 发表时间 2007:05:12 12:20:27
来源 100Test.Com百考试题网


简体中文有两种常用编码:GB2312 和 Unicode. 它们的对应关系可以用

下面的程式所生成:

/** 

 * GB2312Unicde.java 

 * Copyright (c) 2003 by Dr. Herong Yang, http://www.herongyang.com/ 

 */ 

import java.io.*. 

import java.nio.*. 

import java.nio.charset.*. 

class GB2312Unicde { 

   static OutputStream out = null. 

   static char hexDigit[] = {0, 1, 2, 3, 4, 5, 6, 7, 

                             8, 9, A, B, C, D, E, F}. 

   static int b_out[] = {201,267,279,293,484,587,625,657,734,782,827, 

      874,901,980,5590}. 

   static int e_out[] = {216,268,280,294,494,594,632,694,748,794,836, 

      894,903,994,5594}. 

   public static void main(String[] args) { 

      try { 

         out = new FileOutputStream("gb2312.gb"). 

         writeCode(). 

         out.close(). 

      } catch (IOException e) { 

         System.out.println(e.toString()). 

      } 

   } 

   public static void writeCode() throws IOException { 

      boolean reserved = false. 

      String name = null. 

      // GB2312 is not supported by JDK. So I am using GBK. 

      CharsetDecoder gbdc = Charset.forName("GBK").newDecoder(). 

      CharsetEncoder uxec = Charset.forName("UTF-16BE").newEncoder(). 

      CharsetEncoder u8ec = Charset.forName("UTF-8").newEncoder(). 

      ByteBuffer gbbb = null. 

      ByteBuffer uxbb = null. 

      ByteBuffer u8bb = null. 

      CharBuffer cb = null. 

      int count = 0. 

      for (int i=1. i<=94. i  ) { 

         // Defining row settings 

         if (i>=1 &.&. i<=9) { 

            reserved = false. 

            name = "Graphic symbols". 

         } else if (i>=10 &.&. i<=15) { 

            reserved = true. 

            name = "Reserved". 

         } else if (i>=16 &.&. i<=55) { 

            reserved = false. 

            name = "Level 1 characters". 

         } else if (i>=56 &.&. i<=87) { 

            reserved = false. 

            name = "Level 2 characters". 

         } else if (i>=88 &.&. i<=94) { 

            reserved = true. 

            name = "Reserved". 

         } 

         // writing row title 

         writeln(). 

         writeString("

"). writeNumber(i). writeString(" Row: " name). writeln(). writeString("

"). writeln(). if (!reserved) { writeln(). writeHeader(). // looping through all characters in one row for (int j=1. j<=94. j ) { byte hi = (byte)(0xA0 i). byte lo = (byte)(0xA0 j). if (validGB(i,j)) { // getting GB, UTF-16BE, UTF-8 codes gbbb = ByteBuffer.wrap(new byte[]{hi,lo}). try { cb = gbdc.decode(gbbb). uxbb = uxec.encode(cb). cb.rewind(). u8bb = u8ec.encode(cb). } catch (CharacterCodingException e) { cb = null. uxbb = null. u8bb = null. } } else { cb = null. uxbb = null. u8bb = null. } writeNumber(i). writeNumber(j). writeString(" "). if (cb!=null) { writeByte(hi). writeByte(lo). writeString(" "). writeHex(hi). writeHex(lo). count . } else { writeGBSpace(). writeString(" null"). } writeString(" "). writeByteBuffer(uxbb,2). writeString(" "). writeByteBuffer(u8bb,3). if (j%2 == 0) { writeln(). } else { writeString(" "). } } writeFooter(). } } System.out.println("Number of GB characters worte: " count). } public static void writeln() throws IOException { out.write(0x0D). out.write(0x0A). } public static void writeByte(byte b) throws IOException { out.write(b &. 0xFF). } public static void writeByteBuffer(ByteBuffer b, int l) throws IOException { int i = 0. if (b==null) { writeString("null"). i = 2. } else { for (i=0. i> 4) &. 0x0F]). out.write((int) hexDigit[b &. 0x0F]). } public static void writeHeader() throws IOException { writeString("
"). 

      writeln(). 

      writeString("Q.W. "). 

      writeGBSpace(). 

      writeString(" GB   Uni. UTF-8 "). 

      writeString("   "). 

      writeString("Q.W. "). 

      writeGBSpace(). 

      writeString(" GB   Uni. UTF-8 "). 

      writeln(). 

      writeln(). 

   } 

   public static void writeFooter() throws IOException { 

      writeString("
"). writeln(). } public static boolean validGB(int i,int j) { for (int l=0. l=b_out[l] &.&. i*100 j<=e_out[l]) return false. } return true. } }

程式输出的例表格式如下:

Q.W.   GB Uni. UTF-8 Q.W.   GB Uni. UTF-8

1601 啊 B0A1 554A E5958A 1602 阿 B0A2 963F E998BF

1603 埃 B0A3 57C3 E59F83 1604 挨 B0A4 6328 E68CA8

1605 哎 B0A5 54CE E5938E 1606 唉 B0A6 5509 E59489

1607 哀 B0A7 54C0 E59380 1608 皑 B0A8 7691 E79A91

1609 癌 B0A9 764C E7998C 1610 蔼 B0AA 853C E894BC

1611 矮 B0AB 77EE E79FAE 1612 艾 B0AC 827E E889BE

1613 碍 B0AD 788D E7A28D 1614 爱 B0AE 7231 E788B1

1615 隘 B0AF 9698 E99A98 1616 鞍 B0B0 978D E99E8D

1617 氨 B0B1 6C28 E6B0A8 1618 安 B0B2 5B89 E5AE89

1619 俺 B0B3 4FFA E4BFBA 1620 按 B0B4 6309 E68C89

1621 暗 B0B5 6697 E69A97 1622 岸 B0B6 5CB8 E5B2B8

1623 胺 B0B7 80FA E883BA 1624 案 B0B8 6848 E6A188

1625 肮 B0B9 80AE E882AE 1626 昂 B0BA 6602 E69882

1627 盎 B0BB 76CE E79B8E 1628 凹 B0BC 51F9 E587B9

1629 敖 B0BD 6556 E69596 1630 熬 B0BE 71AC E786AC

1631 翱 B0BF 7FF1 E7BFB1 1632 袄 B0C0 8884 E8A284

1633 傲 B0C1 50B2 E582B2 1634 奥 B0C2 5965 E5A5A5

1635 懊 B0C3 61CA E6878A 1636 澳 B0C4 6FB3 E6BEB3



相关文章


j2me创意--Wap浏览器的源代码
关于JAVA匿名内部类的一点讨论
在JAVA中实现图形界面退出
JAVA进阶:提高代码可重用性的三个措施
从GB2312到Unicode转换表制作程式
JSP显示内容缓存技巧
Sun新SPARC企业服务器与虚拟化方案发布
使用BEAWorkshop开发ZK应用—安装篇
配置BEAWebLogic8.1JDBC连接
澳大利亚华人论坛
考好网
日本华人论坛
华人移民留学论坛
英国华人论坛