1#![cfg_attr(not(feature = "std"), no_std)]
42
43extern crate alloc;
44
45mod error;
46
47use core::cmp::Ordering;
48
49use alloc::string::String;
50use alloc::vec::Vec;
51
52pub use error::Utf8Error;
53
54#[derive(Debug, Clone, Default)]
56pub struct Utf8Builder {
57 buffer: Vec<u8>,
58 sl: u8,
60 sel: u8,
62}
63
64impl Utf8Builder {
65 #[inline]
67 pub const fn new() -> Self {
68 Utf8Builder {
69 buffer: Vec::new(),
70 sl: 0,
71 sel: 0,
72 }
73 }
74
75 #[inline]
77 pub fn with_capacity(capacity: usize) -> Self {
78 Utf8Builder {
79 buffer: Vec::with_capacity(capacity),
80 sl: 0,
81 sel: 0,
82 }
83 }
84
85 #[inline]
87 pub fn reserve(&mut self, additional: usize) {
88 self.buffer.reserve(additional);
89 }
90
91 #[inline]
93 pub fn len(&self) -> usize {
94 self.buffer.len()
95 }
96
97 #[inline]
99 pub fn is_empty(&self) -> bool {
100 self.buffer.is_empty()
101 }
102}
103
104impl Utf8Builder {
105 #[inline]
107 pub fn is_valid(&self) -> bool {
108 self.sl == 0
109 }
110
111 #[inline]
113 pub fn finalize(self) -> Result<String, Utf8Error> {
114 if self.is_valid() {
115 let s = unsafe { String::from_utf8_unchecked(self.buffer) };
116
117 Ok(s)
118 } else {
119 Err(Utf8Error)
120 }
121 }
122}
123
124impl Utf8Builder {
125 pub fn push(&mut self, b: u8) -> Result<(), Utf8Error> {
127 if self.sl == 0 {
128 let w = utf8_width::get_width(b);
129
130 match w {
131 0 => return Err(Utf8Error),
132 1 => {
133 self.buffer.push(b);
134 }
135 _ => {
136 self.buffer.push(b);
137 self.sl = 1;
138 self.sel = w as u8;
139 }
140 }
141 } else if self.sl + 1 == self.sel {
142 self.buffer.push(b);
143
144 self.sl = 0;
145 } else {
147 self.buffer.push(b);
148
149 self.sl += 1;
150 }
151
152 Ok(())
153 }
154
155 #[inline]
157 pub fn push_str(&mut self, s: &str) -> Result<(), Utf8Error> {
158 if self.sl == 0 {
159 self.buffer.extend_from_slice(s.as_bytes());
160
161 Ok(())
162 } else {
163 Err(Utf8Error)
164 }
165 }
166
167 pub fn push_char(&mut self, c: char) -> Result<(), Utf8Error> {
169 if self.sl == 0 {
170 self.buffer.reserve(4);
171
172 let len = self.buffer.len();
173
174 unsafe {
175 self.buffer.set_len(len + 4);
176 }
177
178 let c = c.encode_utf8(&mut self.buffer[len..]).len();
179
180 unsafe {
181 self.buffer.set_len(len + c);
182 }
183
184 Ok(())
185 } else {
186 Err(Utf8Error)
187 }
188 }
189
190 pub fn push_chunk(&mut self, chunk: &[u8]) -> Result<(), Utf8Error> {
192 let chunk_size = chunk.len();
193
194 if chunk_size == 0 {
195 return Ok(());
196 }
197
198 let mut e = if self.sl > 0 {
199 let r = (self.sel - self.sl) as usize;
200
201 match r.cmp(&chunk_size) {
202 Ordering::Greater => {
203 let sl = self.sl as usize;
204 let nsl = sl + chunk_size;
205
206 self.buffer.extend_from_slice(chunk);
207
208 self.sl = nsl as u8;
209
210 return Ok(());
211 }
212 Ordering::Equal => {
213 self.buffer.extend_from_slice(chunk);
214
215 self.sl = 0;
216 return Ok(());
219 }
220 Ordering::Less => {
221 self.buffer.extend_from_slice(&chunk[..r]);
222
223 self.sl = 0;
224 r
227 }
228 }
229 } else {
230 0usize
231 };
232
233 loop {
234 let w = utf8_width::get_width(chunk[e]);
235
236 if w == 0 {
237 return Err(Utf8Error);
238 }
239
240 let r = chunk_size - e;
241
242 if r >= w {
243 self.buffer.extend_from_slice(&chunk[e..e + w]);
244
245 e += w;
246
247 if e == chunk_size {
248 break;
249 }
250 } else {
251 self.buffer.extend_from_slice(&chunk[e..]);
252
253 self.sl = r as u8;
254 self.sel = w as u8;
255
256 break;
257 }
258 }
259
260 Ok(())
261 }
262}
263
264impl From<&str> for Utf8Builder {
265 #[inline]
266 fn from(s: &str) -> Self {
267 Utf8Builder {
268 buffer: s.as_bytes().to_vec(),
269 sl: 0,
270 sel: 0,
271 }
272 }
273}
274
275impl From<String> for Utf8Builder {
276 #[inline]
277 fn from(s: String) -> Self {
278 Utf8Builder {
279 buffer: s.into_bytes(),
280 sl: 0,
281 sel: 0,
282 }
283 }
284}