1    
2    /*
3    
4      The author of this software is Ian Kaplan
5      Bear Products International
6      www.bearcave.com
7      iank@bearcave.com
8    
9      Copyright (c) Ian Kaplan, 1999, 2000
10   
11     See copyright file for usage and licensing
12   
13   */
14   
15   package jconst;
16   
17   import java.io.*;
18   import util.*;
19   
20   /*
21    * constUtf8
22    *
23   
24       CONSTANT_Utf8_info {
25          u1 tag;
26          u2 len;
27          u1 bytes[len];
28       }
29   
30      See section 4.4.7 of the Java Virtual Machine Specification for a
31      description of multi-byte character representation.  Bit twidling
32      in Java Dudes and Dudettes.
33   
34    */
35   public class constUtf8 extends constBase {
36     private String str = null;
37     
38     public void read( DataInputStream dStream ) {
39       StringBuffer strBuf;
40       int len, charCnt;
41       byte one_byte;
42       char one_char;
43   
44       one_char = '\u0000';
45       len = readU2( dStream );
46       strBuf = new StringBuffer();
47       charCnt = 0;
48       while (charCnt < len) {
49         one_byte = (byte)readU1( dStream );
50         charCnt++;
51         if ((one_byte >> 7) == 1) {
52   	short tmp;
53   
54   	// its a multi-byte character
55   	tmp = (short)(one_byte & 0x3f);  // Bits 5..0 (six bits)
56   	// read the next byte
57   	one_byte = (byte)readU1( dStream );
58   	charCnt++;
59   	tmp = (short)(tmp | ((one_byte & 0x3f) << 6));
60   	if ((one_byte >> 6) == 0x2) {
61   	  // We have 12 bits so far, get bits 15..12
62   	  one_byte = (byte)readU1( dStream );
63   	  charCnt++;
64   	  one_byte = (byte)(one_byte & 0xf);
65   	  tmp = (short)(tmp | (one_byte << 12));
66   	}
67   	one_char = (char)tmp;
68         }
69         else {
70   	one_char = (char)one_byte;
71         }
72         strBuf.append(one_char);
73       } // while
74       str = strBuf.toString();
75     } // read
76   
77   
78     /**
79        Return the Utf8 string in ASCII format.  Any characters
80        which are outside the ASCII printable range are represented
81        as either back-slash escapes or as \\uxxxx strings (e.g.,
82        hex form of the unicode character.
83      */
84     public String toAsciiString() {
85       String retStr = null;
86   
87       if (str != null) {
88         StringBuffer strbuf = new StringBuffer();
89         int len = str.length();
90         char ch;
91   
92         for (int i = 0; i < len; i++) {
93   	// standard non-graphic printable 
94   	// ASCII range is ' ' (0x20) to '~' (7E)
95   	ch = str.charAt(i);
96   	if (ch >= ' ' && ch <= '~')
97   	  strbuf.append( ch );
98   	else {
99   	  String tmp;
100  
101  	  tmp = null;
102  	  if (ch == '\b')
103  	    tmp = "\\b";
104  	  else if (ch == '\t')
105  	    tmp = "\\t";
106  	  else if (ch == '\n')
107  	    tmp = "\\n";
108  	  else if (ch == '\f')
109  	    tmp = "\\f";
110  	  else if (ch == '\r')
111  	    tmp = "\\r";
112  	  else 
113  	    tmp = "\\u" + Integer.toHexString( (int)ch );
114  	  strbuf.append( tmp );
115  	}
116        } // for
117        retStr = strbuf.toString();
118      }
119      return retStr;
120    } // toAsciiString
121  
122  
123    /**
124      Print a Utf8 String in ASCII format.  Characters
125      which are outside the ASCII range are printed
126      as hex values in \\uxxxx format.
127  
128     */
129    public void pr() {
130      System.out.print( toAsciiString() );
131    } // pr
132  
133  
134    public void pr_data() {
135      int len = str.length();
136  
137      System.out.println("end offset = " + getBytesRead() );
138      System.out.println( str );
139      for (int i = 0; i < str.length(); i++) {
140        System.out.print( Integer.toHexString( (int)str.charAt(i) ) + " " );
141      }
142      System.out.println();
143    } // pr_data
144  
145  
146    /**
147      Return the raw Utf8 string, without any translation.
148      */
149    public String getString() {
150      return str;
151    }
152  
153  
154    /**
155      Return a printable version of the Utf8 string.
156      */
157    public String getPrintableString() {
158      return toAsciiString();
159    }
160  
161  } // constUtf8
162