1 //========================================================================
2 //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3 //------------------------------------------------------------------------
4 //Licensed under the Apache License, Version 2.0 (the "License");
5 //you may not use this file except in compliance with the License.
6 //You may obtain a copy of the License at
7 //http://www.apache.org/licenses/LICENSE-2.0
8 //Unless required by applicable law or agreed to in writing, software
9 //distributed under the License is distributed on an "AS IS" BASIS,
10 //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 //See the License for the specific language governing permissions and
12 //limitations under the License.
13 //========================================================================
14
15 package org.mortbay.util;
16
17 /* ------------------------------------------------------------ */
18 /** UTF-8 StringBuffer.
19 *
20 * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
21 * UTF-8 encoded bytes, that are converted into characters.
22 *
23 * This class is stateful and up to 6 calls to {@link #append(byte)} may be needed before
24 * state a character is appended to the string buffer.
25 *
26 * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
27 * The UTF-8 code was inspired by http://javolution.org
28 *
29 */
30 public class Utf8StringBuffer
31 {
32 StringBuffer _buffer;
33 int _more;
34 int _bits;
35 boolean _errors;
36
37 public Utf8StringBuffer()
38 {
39 _buffer=new StringBuffer();
40 }
41
42 public Utf8StringBuffer(int capacity)
43 {
44 _buffer=new StringBuffer(capacity);
45 }
46
47 public void append(byte[] b,int offset, int length)
48 {
49 int end=offset+length;
50 for (int i=offset; i<end;i++)
51 append(b[i]);
52 }
53
54 public void append(byte b)
55 {
56 if (b>=0)
57 {
58 if (_more>0)
59 {
60 _buffer.append('?');
61 _more=0;
62 _bits=0;
63 }
64 else
65 _buffer.append((char)(0x7f&b));
66 }
67 else if (_more==0)
68 {
69 if ((b&0xc0)!=0xc0)
70 {
71 // 10xxxxxx
72 _buffer.append('?');
73 _more=0;
74 _bits=0;
75 }
76 else if ((b & 0xe0) == 0xc0)
77 {
78 //110xxxxx
79 _more=1;
80 _bits=b&0x1f;
81 }
82 else if ((b & 0xf0) == 0xe0)
83 {
84 //1110xxxx
85 _more=2;
86 _bits=b&0x0f;
87 }
88 else if ((b & 0xf8) == 0xf0)
89 {
90 //11110xxx
91 _more=3;
92 _bits=b&0x07;
93 }
94 else if ((b & 0xfc) == 0xf8)
95 {
96 //111110xx
97 _more=4;
98 _bits=b&0x03;
99 }
100 else if ((b & 0xfe) == 0xfc)
101 {
102 //1111110x
103 _more=5;
104 _bits=b&0x01;
105 }
106 }
107 else
108 {
109 if ((b&0xc0)==0xc0)
110 { // 11??????
111 _buffer.append('?');
112 _more=0;
113 _bits=0;
114 _errors=true;
115 }
116 else
117 {
118 // 10xxxxxx
119 _bits=(_bits<<6)|(b&0x3f);
120 if (--_more==0)
121 _buffer.append((char)_bits);
122 }
123 }
124 }
125
126 public int length()
127 {
128 return _buffer.length();
129 }
130
131 public void reset()
132 {
133 _buffer.setLength(0);
134 _more=0;
135 _bits=0;
136 _errors=false;
137 }
138
139 public StringBuffer getStringBuffer()
140 {
141 return _buffer;
142 }
143
144 public String toString()
145 {
146 return _buffer.toString();
147 }
148
149 /* ------------------------------------------------------------ */
150 /**
151 * @return True if there are non UTF-8 characters or incomplete UTF-8 characters in the buffer.
152 */
153 public boolean isError()
154 {
155 return _errors || _more>0;
156 }
157 }