1use crate::{DecodeError, Decoder, EncodeError, Encoder};
2
3pub const UTF32_NAME: &str = "UTF-32";
5
6#[derive(Debug, Default)]
8pub struct UTF32Encoder {
9 init: bool,
10}
11impl Encoder for UTF32Encoder {
12 fn name(&self) -> &'static str {
13 UTF32_NAME
14 }
15
16 fn encode(
17 &mut self,
18 src: &str,
19 dst: &mut [u8],
20 finish: bool,
21 ) -> Result<(usize, usize), EncodeError> {
22 if src.is_empty() {
23 return Err(EncodeError::InputIsEmpty);
24 }
25 if dst.len() < 4 {
26 return Err(EncodeError::OutputTooShort);
27 }
28
29 if !self.init {
30 self.init = true;
31 dst[0] = 0xFF;
33 dst[1] = 0xFE;
34 dst[2] = 0x00;
35 dst[3] = 0x00;
36 return Ok((0, 4));
37 }
38 UTF32LEEncoder.encode(src, dst, finish)
39 }
40}
41
42pub struct UTF32Decoder {
44 read: usize,
45 top: [u8; 4],
46 be: bool,
47}
48impl Decoder for UTF32Decoder {
49 fn name(&self) -> &'static str {
50 UTF32_NAME
51 }
52
53 fn decode(
54 &mut self,
55 mut src: &[u8],
56 dst: &mut String,
57 finish: bool,
58 ) -> Result<(usize, usize), DecodeError> {
59 if src.is_empty() {
60 return Err(DecodeError::InputIsEmpty);
61 }
62 if dst.capacity() - dst.len() < 4 {
63 return Err(DecodeError::OutputTooShort);
64 }
65
66 let mut base = 0;
67 if self.read < 4 {
68 let orig = src.len();
69 while self.read < 4 && !src.is_empty() {
70 self.top[self.read] = src[0];
71 src = &src[1..];
72 self.read += 1;
73 }
74 base = orig - src.len();
75 if self.read == 4 {
76 if matches!(self.top[..], [0xFF, 0xFE, 0x00, 0x00]) {
79 self.be = false;
80 return Ok((base, 0));
81 } else if matches!(self.top[..], [0x00, 0x00, 0xFE, 0xFF]) {
82 self.be = true;
83 return Ok((base, 0));
84 } else {
85 self.be = true;
86 };
89 } else {
90 return Ok((base, 0));
91 }
92 }
93
94 if self.be && !matches!(self.top[..], [0x00, 0x00, 0xFE, 0xFF]) {
95 let codepoint = u32::from_be_bytes(self.top);
96 let mut write = 0;
97 match char::from_u32(codepoint) {
98 Some(c) => {
99 write += c.len_utf8();
100 dst.push(c);
101 }
102 None => {
103 return Err(DecodeError::Malformed {
106 read: 4,
107 write,
108 length: 4,
109 offset: 0,
110 });
111 }
112 }
113
114 self.top = [0x00, 0x00, 0xFE, 0xFF];
115 return Ok((base, write));
116 }
117
118 if self.be {
119 UTF32BEDecoder.decode(src, dst, finish)
120 } else {
121 UTF32LEDecoder.decode(src, dst, finish)
122 }
123 }
124}
125
126impl Default for UTF32Decoder {
127 fn default() -> Self {
128 Self {
129 read: 0,
130 top: [0; 4],
131 be: true,
132 }
133 }
134}
135
136pub const UTF32BE_NAME: &str = "UTF-32BE";
138
139pub struct UTF32BEEncoder;
141impl Encoder for UTF32BEEncoder {
142 fn name(&self) -> &'static str {
143 UTF32BE_NAME
144 }
145
146 fn encode(
147 &mut self,
148 src: &str,
149 mut dst: &mut [u8],
150 _finish: bool,
151 ) -> Result<(usize, usize), EncodeError> {
152 if src.is_empty() {
153 return Err(EncodeError::InputIsEmpty);
154 }
155 if dst.len() < 4 {
156 return Err(EncodeError::OutputTooShort);
157 }
158
159 let mut read = 0;
160 let mut write = 0;
161 for c in src.chars() {
162 read += c.len_utf8();
163 dst[..4].copy_from_slice(&(c as u32).to_be_bytes()[..]);
164 dst = &mut dst[4..];
165 write += 4;
166 if dst.len() < 4 {
167 break;
168 }
169 }
170 Ok((read, write))
171 }
172}
173
174pub struct UTF32BEDecoder;
176impl Decoder for UTF32BEDecoder {
177 fn name(&self) -> &'static str {
178 UTF32BE_NAME
179 }
180
181 fn decode(
182 &mut self,
183 src: &[u8],
184 dst: &mut String,
185 finish: bool,
186 ) -> Result<(usize, usize), DecodeError> {
187 if src.is_empty() {
188 return Err(DecodeError::InputIsEmpty);
189 }
190 let cap = dst.capacity() - dst.len();
191 if cap < 4 {
192 return Err(DecodeError::OutputTooShort);
193 }
194
195 let mut read = 0;
196 let mut write = 0;
197 for bytes in src.chunks_exact(4) {
198 read += 4;
199 let codepoint = u32::from_be_bytes(bytes.try_into().unwrap());
200 match char::from_u32(codepoint) {
201 Some(c) => {
202 write += c.len_utf8();
203 dst.push(c);
204 }
205 None => {
206 return Err(DecodeError::Malformed {
207 read,
208 write,
209 length: 4,
210 offset: 0,
211 });
212 }
213 }
214 if dst.capacity() - dst.len() < 4 {
215 break;
216 }
217 }
218
219 let rem = src.len() - read;
220 if finish && rem < 4 && rem != 0 && dst.capacity() - dst.len() >= 4 {
221 return Err(DecodeError::Malformed {
222 read: src.len(),
223 write,
224 length: src.len() - read,
225 offset: 0,
226 });
227 }
228
229 Ok((read, write))
230 }
231}
232
233pub const UTF32LE_NAME: &str = "UTF-32LE";
235
236pub struct UTF32LEEncoder;
238impl Encoder for UTF32LEEncoder {
239 fn name(&self) -> &'static str {
240 UTF32LE_NAME
241 }
242
243 fn encode(
244 &mut self,
245 src: &str,
246 mut dst: &mut [u8],
247 _finish: bool,
248 ) -> Result<(usize, usize), EncodeError> {
249 if src.is_empty() {
250 return Err(EncodeError::InputIsEmpty);
251 }
252 if dst.len() < 4 {
253 return Err(EncodeError::OutputTooShort);
254 }
255
256 let mut read = 0;
257 let mut write = 0;
258 for c in src.chars() {
259 read += c.len_utf8();
260 dst[..4].copy_from_slice(&(c as u32).to_le_bytes()[..]);
261 dst = &mut dst[4..];
262 write += 4;
263 if dst.len() < 4 {
264 break;
265 }
266 }
267 Ok((read, write))
268 }
269}
270
271pub struct UTF32LEDecoder;
273impl Decoder for UTF32LEDecoder {
274 fn name(&self) -> &'static str {
275 UTF32LE_NAME
276 }
277
278 fn decode(
279 &mut self,
280 src: &[u8],
281 dst: &mut String,
282 finish: bool,
283 ) -> Result<(usize, usize), DecodeError> {
284 if src.is_empty() {
285 return Err(DecodeError::InputIsEmpty);
286 }
287 let cap = dst.capacity() - dst.len();
288 if cap < 4 {
289 return Err(DecodeError::OutputTooShort);
290 }
291
292 let mut read = 0;
293 let mut write = 0;
294 for bytes in src.chunks_exact(4) {
295 read += 4;
296 let codepoint = u32::from_le_bytes(bytes.try_into().unwrap());
297 match char::from_u32(codepoint) {
298 Some(c) => {
299 write += c.len_utf8();
300 dst.push(c);
301 }
302 None => {
303 return Err(DecodeError::Malformed {
304 read,
305 write,
306 length: 4,
307 offset: 0,
308 });
309 }
310 }
311 if dst.capacity() - dst.len() < 4 {
312 break;
313 }
314 }
315
316 let rem = src.len() - read;
317 if finish && rem < 4 && rem != 0 && dst.capacity() - dst.len() >= 4 {
318 return Err(DecodeError::Malformed {
319 read: src.len(),
320 write,
321 length: src.len() - read,
322 offset: 0,
323 });
324 }
325
326 Ok((read, write))
327 }
328}
329
330const UCS4_UNUSUAL_2143_NAME: &str = "UCS4-UNUSUAL-2143";
332
333pub struct UCS4Unusual2143Encoder;
334impl Encoder for UCS4Unusual2143Encoder {
335 fn name(&self) -> &'static str {
336 UCS4_UNUSUAL_2143_NAME
337 }
338
339 fn encode(
340 &mut self,
341 src: &str,
342 mut dst: &mut [u8],
343 _finish: bool,
344 ) -> Result<(usize, usize), EncodeError> {
345 if src.is_empty() {
346 return Err(EncodeError::InputIsEmpty);
347 }
348 if dst.len() < 4 {
349 return Err(EncodeError::OutputTooShort);
350 }
351
352 let mut read = 0;
353 let mut write = 0;
354 for c in src.chars() {
355 read += c.len_utf8();
356 let bytes = (c as u32).to_be_bytes();
357 dst[0] = bytes[1];
358 dst[1] = bytes[0];
359 dst[2] = bytes[3];
360 dst[3] = bytes[2];
361 dst = &mut dst[4..];
362 write += 4;
363 if dst.len() < 4 {
364 break;
365 }
366 }
367 Ok((read, write))
368 }
369}
370
371pub struct UCS4Unusual2143Decoder;
372impl Decoder for UCS4Unusual2143Decoder {
373 fn name(&self) -> &'static str {
374 UCS4_UNUSUAL_2143_NAME
375 }
376
377 fn decode(
378 &mut self,
379 src: &[u8],
380 dst: &mut String,
381 finish: bool,
382 ) -> Result<(usize, usize), DecodeError> {
383 if src.is_empty() {
384 return Err(DecodeError::InputIsEmpty);
385 }
386 let cap = dst.capacity() - dst.len();
387 if cap < 4 {
388 return Err(DecodeError::OutputTooShort);
389 }
390
391 let mut read = 0;
392 let mut write = 0;
393 for bytes in src.chunks_exact(4) {
394 read += 4;
395 let codepoint = u32::from_le_bytes([bytes[2], bytes[3], bytes[0], bytes[1]]);
396 match char::from_u32(codepoint) {
397 Some(c) => {
398 write += c.len_utf8();
399 dst.push(c);
400 }
401 None => {
402 return Err(DecodeError::Malformed {
403 read,
404 write,
405 length: 4,
406 offset: 0,
407 });
408 }
409 }
410 if dst.capacity() - dst.len() < 4 {
411 break;
412 }
413 }
414
415 let rem = src.len() - read;
416 if finish && rem < 4 && rem != 0 && dst.capacity() - dst.len() >= 4 {
417 return Err(DecodeError::Malformed {
418 read: src.len(),
419 write,
420 length: src.len() - read,
421 offset: 0,
422 });
423 }
424
425 Ok((read, write))
426 }
427}
428
429const UCS4_UNUSUAL_3412_NAME: &str = "UCS4-UNUSUAL-3412";
431
432pub struct UCS4Unusual3412Encoder;
433impl Encoder for UCS4Unusual3412Encoder {
434 fn name(&self) -> &'static str {
435 UCS4_UNUSUAL_3412_NAME
436 }
437
438 fn encode(
439 &mut self,
440 src: &str,
441 mut dst: &mut [u8],
442 _finish: bool,
443 ) -> Result<(usize, usize), EncodeError> {
444 if src.is_empty() {
445 return Err(EncodeError::InputIsEmpty);
446 }
447 if dst.len() < 4 {
448 return Err(EncodeError::OutputTooShort);
449 }
450
451 let mut read = 0;
452 let mut write = 0;
453 for c in src.chars() {
454 read += c.len_utf8();
455 let bytes = (c as u32).to_be_bytes();
456 dst[0] = bytes[2];
457 dst[1] = bytes[3];
458 dst[2] = bytes[0];
459 dst[3] = bytes[1];
460 dst = &mut dst[4..];
461 write += 4;
462 if dst.len() < 4 {
463 break;
464 }
465 }
466 Ok((read, write))
467 }
468}
469
470pub struct UCS4Unusual3412Decoder;
471impl Decoder for UCS4Unusual3412Decoder {
472 fn name(&self) -> &'static str {
473 UCS4_UNUSUAL_3412_NAME
474 }
475
476 fn decode(
477 &mut self,
478 src: &[u8],
479 dst: &mut String,
480 finish: bool,
481 ) -> Result<(usize, usize), DecodeError> {
482 if src.is_empty() {
483 return Err(DecodeError::InputIsEmpty);
484 }
485 let cap = dst.capacity() - dst.len();
486 if cap < 4 {
487 return Err(DecodeError::OutputTooShort);
488 }
489
490 let mut read = 0;
491 let mut write = 0;
492 for bytes in src.chunks_exact(4) {
493 read += 4;
494 let codepoint = u32::from_le_bytes([bytes[1], bytes[0], bytes[2], bytes[3]]);
495 match char::from_u32(codepoint) {
496 Some(c) => {
497 write += c.len_utf8();
498 dst.push(c);
499 }
500 None => {
501 return Err(DecodeError::Malformed {
502 read,
503 write,
504 length: 4,
505 offset: 0,
506 });
507 }
508 }
509 if dst.capacity() - dst.len() < 4 {
510 break;
511 }
512 }
513
514 let rem = src.len() - read;
515 if finish && rem < 4 && rem != 0 && dst.capacity() - dst.len() >= 4 {
516 return Err(DecodeError::Malformed {
517 read: src.len(),
518 write,
519 length: src.len() - read,
520 offset: 0,
521 });
522 }
523
524 Ok((read, write))
525 }
526}