1use crate::{DecodeError, Decoder, EncodeError, Encoder};
2
3pub const UTF32_NAME: &str = "UTF-32";
4
5#[derive(Debug, Default)]
6pub struct UTF32Encoder {
7 init: bool,
8}
9impl Encoder for UTF32Encoder {
10 fn name(&self) -> &'static str {
11 UTF32_NAME
12 }
13
14 fn encode(
15 &mut self,
16 src: &str,
17 dst: &mut [u8],
18 finish: bool,
19 ) -> Result<(usize, usize), EncodeError> {
20 if src.is_empty() {
21 return Err(EncodeError::InputIsEmpty);
22 }
23 if dst.len() < 4 {
24 return Err(EncodeError::OutputTooShort);
25 }
26
27 if !self.init {
28 self.init = true;
29 dst[0] = 0xFF;
31 dst[1] = 0xFE;
32 dst[2] = 0x00;
33 dst[3] = 0x00;
34 return Ok((0, 4));
35 }
36 UTF32LEEncoder.encode(src, dst, finish)
37 }
38}
39
40pub struct UTF32Decoder {
41 read: usize,
42 top: [u8; 4],
43 be: bool,
44}
45impl Decoder for UTF32Decoder {
46 fn name(&self) -> &'static str {
47 UTF32_NAME
48 }
49
50 fn decode(
51 &mut self,
52 mut src: &[u8],
53 dst: &mut String,
54 finish: bool,
55 ) -> Result<(usize, usize), DecodeError> {
56 if src.is_empty() {
57 return Err(DecodeError::InputIsEmpty);
58 }
59 if dst.capacity() - dst.len() < 4 {
60 return Err(DecodeError::OutputTooShort);
61 }
62
63 let mut base = 0;
64 if self.read < 4 {
65 let orig = src.len();
66 while self.read < 4 && !src.is_empty() {
67 self.top[self.read] = src[0];
68 src = &src[1..];
69 self.read += 1;
70 }
71 base = orig - src.len();
72 if self.read == 4 {
73 if matches!(self.top[..], [0xFF, 0xFE, 0x00, 0x00]) {
76 self.be = false;
77 return Ok((base, 0));
78 } else if matches!(self.top[..], [0x00, 0x00, 0xFE, 0xFF]) {
79 self.be = true;
80 return Ok((base, 0));
81 } else {
82 self.be = true;
83 };
86 } else {
87 return Ok((base, 0));
88 }
89 }
90
91 if self.be && !matches!(self.top[..], [0x00, 0x00, 0xFE, 0xFF]) {
92 let codepoint = u32::from_be_bytes(self.top);
93 let mut write = 0;
94 match char::from_u32(codepoint) {
95 Some(c) => {
96 write += c.len_utf8();
97 dst.push(c);
98 }
99 None => {
100 return Err(DecodeError::Malformed {
103 read: 4,
104 write,
105 length: 4,
106 offset: 0,
107 });
108 }
109 }
110
111 self.top = [0x00, 0x00, 0xFE, 0xFF];
112 return Ok((base, write));
113 }
114
115 if self.be {
116 UTF32BEDecoder.decode(src, dst, finish)
117 } else {
118 UTF32LEDecoder.decode(src, dst, finish)
119 }
120 }
121}
122
123impl Default for UTF32Decoder {
124 fn default() -> Self {
125 Self {
126 read: 0,
127 top: [0; 4],
128 be: true,
129 }
130 }
131}
132
133pub const UTF32BE_NAME: &str = "UTF-32BE";
134
135pub struct UTF32BEEncoder;
136impl Encoder for UTF32BEEncoder {
137 fn name(&self) -> &'static str {
138 UTF32BE_NAME
139 }
140
141 fn encode(
142 &mut self,
143 src: &str,
144 mut dst: &mut [u8],
145 _finish: bool,
146 ) -> Result<(usize, usize), EncodeError> {
147 if src.is_empty() {
148 return Err(EncodeError::InputIsEmpty);
149 }
150 if dst.len() < 4 {
151 return Err(EncodeError::OutputTooShort);
152 }
153
154 let mut read = 0;
155 let mut write = 0;
156 for c in src.chars() {
157 read += c.len_utf8();
158 dst[..4].copy_from_slice(&(c as u32).to_be_bytes()[..]);
159 dst = &mut dst[4..];
160 write += 4;
161 if dst.len() < 4 {
162 break;
163 }
164 }
165 Ok((read, write))
166 }
167}
168
169pub struct UTF32BEDecoder;
170impl Decoder for UTF32BEDecoder {
171 fn name(&self) -> &'static str {
172 UTF32BE_NAME
173 }
174
175 fn decode(
176 &mut self,
177 src: &[u8],
178 dst: &mut String,
179 finish: bool,
180 ) -> Result<(usize, usize), DecodeError> {
181 if src.is_empty() {
182 return Err(DecodeError::InputIsEmpty);
183 }
184 let cap = dst.capacity() - dst.len();
185 if cap < 4 {
186 return Err(DecodeError::OutputTooShort);
187 }
188
189 let mut read = 0;
190 let mut write = 0;
191 for bytes in src.chunks_exact(4) {
192 read += 4;
193 let codepoint = u32::from_be_bytes(bytes.try_into().unwrap());
194 match char::from_u32(codepoint) {
195 Some(c) => {
196 write += c.len_utf8();
197 dst.push(c);
198 }
199 None => {
200 return Err(DecodeError::Malformed {
201 read,
202 write,
203 length: 4,
204 offset: 0,
205 });
206 }
207 }
208 if dst.capacity() - dst.len() < 4 {
209 break;
210 }
211 }
212
213 let rem = src.len() - read;
214 if finish && rem < 4 && rem != 0 && dst.capacity() - dst.len() >= 4 {
215 return Err(DecodeError::Malformed {
216 read: src.len(),
217 write,
218 length: src.len() - read,
219 offset: 0,
220 });
221 }
222
223 Ok((read, write))
224 }
225}
226
227pub const UTF32LE_NAME: &str = "UTF-32LE";
228
229pub struct UTF32LEEncoder;
230impl Encoder for UTF32LEEncoder {
231 fn name(&self) -> &'static str {
232 UTF32LE_NAME
233 }
234
235 fn encode(
236 &mut self,
237 src: &str,
238 mut dst: &mut [u8],
239 _finish: bool,
240 ) -> Result<(usize, usize), EncodeError> {
241 if src.is_empty() {
242 return Err(EncodeError::InputIsEmpty);
243 }
244 if dst.len() < 4 {
245 return Err(EncodeError::OutputTooShort);
246 }
247
248 let mut read = 0;
249 let mut write = 0;
250 for c in src.chars() {
251 read += c.len_utf8();
252 dst[..4].copy_from_slice(&(c as u32).to_le_bytes()[..]);
253 dst = &mut dst[4..];
254 write += 4;
255 if dst.len() < 4 {
256 break;
257 }
258 }
259 Ok((read, write))
260 }
261}
262
263pub struct UTF32LEDecoder;
264impl Decoder for UTF32LEDecoder {
265 fn name(&self) -> &'static str {
266 UTF32LE_NAME
267 }
268
269 fn decode(
270 &mut self,
271 src: &[u8],
272 dst: &mut String,
273 finish: bool,
274 ) -> Result<(usize, usize), DecodeError> {
275 if src.is_empty() {
276 return Err(DecodeError::InputIsEmpty);
277 }
278 let cap = dst.capacity() - dst.len();
279 if cap < 4 {
280 return Err(DecodeError::OutputTooShort);
281 }
282
283 let mut read = 0;
284 let mut write = 0;
285 for bytes in src.chunks_exact(4) {
286 read += 4;
287 let codepoint = u32::from_le_bytes(bytes.try_into().unwrap());
288 match char::from_u32(codepoint) {
289 Some(c) => {
290 write += c.len_utf8();
291 dst.push(c);
292 }
293 None => {
294 return Err(DecodeError::Malformed {
295 read,
296 write,
297 length: 4,
298 offset: 0,
299 });
300 }
301 }
302 if dst.capacity() - dst.len() < 4 {
303 break;
304 }
305 }
306
307 let rem = src.len() - read;
308 if finish && rem < 4 && rem != 0 && dst.capacity() - dst.len() >= 4 {
309 return Err(DecodeError::Malformed {
310 read: src.len(),
311 write,
312 length: src.len() - read,
313 offset: 0,
314 });
315 }
316
317 Ok((read, write))
318 }
319}
320
321const UCS4_UNUSUAL_2143_NAME: &str = "UCS4-UNUSUAL-2143";
323
324pub struct UCS4Unusual2143Encoder;
325impl Encoder for UCS4Unusual2143Encoder {
326 fn name(&self) -> &'static str {
327 UCS4_UNUSUAL_2143_NAME
328 }
329
330 fn encode(
331 &mut self,
332 src: &str,
333 mut dst: &mut [u8],
334 _finish: bool,
335 ) -> Result<(usize, usize), EncodeError> {
336 if src.is_empty() {
337 return Err(EncodeError::InputIsEmpty);
338 }
339 if dst.len() < 4 {
340 return Err(EncodeError::OutputTooShort);
341 }
342
343 let mut read = 0;
344 let mut write = 0;
345 for c in src.chars() {
346 read += c.len_utf8();
347 let bytes = (c as u32).to_be_bytes();
348 dst[0] = bytes[1];
349 dst[1] = bytes[0];
350 dst[2] = bytes[3];
351 dst[3] = bytes[2];
352 dst = &mut dst[4..];
353 write += 4;
354 if dst.len() < 4 {
355 break;
356 }
357 }
358 Ok((read, write))
359 }
360}
361
362pub struct UCS4Unusual2143Decoder;
363impl Decoder for UCS4Unusual2143Decoder {
364 fn name(&self) -> &'static str {
365 UCS4_UNUSUAL_2143_NAME
366 }
367
368 fn decode(
369 &mut self,
370 src: &[u8],
371 dst: &mut String,
372 finish: bool,
373 ) -> Result<(usize, usize), DecodeError> {
374 if src.is_empty() {
375 return Err(DecodeError::InputIsEmpty);
376 }
377 let cap = dst.capacity() - dst.len();
378 if cap < 4 {
379 return Err(DecodeError::OutputTooShort);
380 }
381
382 let mut read = 0;
383 let mut write = 0;
384 for bytes in src.chunks_exact(4) {
385 read += 4;
386 let codepoint = u32::from_le_bytes([bytes[2], bytes[3], bytes[0], bytes[1]]);
387 match char::from_u32(codepoint) {
388 Some(c) => {
389 write += c.len_utf8();
390 dst.push(c);
391 }
392 None => {
393 return Err(DecodeError::Malformed {
394 read,
395 write,
396 length: 4,
397 offset: 0,
398 });
399 }
400 }
401 if dst.capacity() - dst.len() < 4 {
402 break;
403 }
404 }
405
406 let rem = src.len() - read;
407 if finish && rem < 4 && rem != 0 && dst.capacity() - dst.len() >= 4 {
408 return Err(DecodeError::Malformed {
409 read: src.len(),
410 write,
411 length: src.len() - read,
412 offset: 0,
413 });
414 }
415
416 Ok((read, write))
417 }
418}
419
420const UCS4_UNUSUAL_3412_NAME: &str = "UCS4-UNUSUAL-3412";
422
423pub struct UCS4Unusual3412Encoder;
424impl Encoder for UCS4Unusual3412Encoder {
425 fn name(&self) -> &'static str {
426 UCS4_UNUSUAL_3412_NAME
427 }
428
429 fn encode(
430 &mut self,
431 src: &str,
432 mut dst: &mut [u8],
433 _finish: bool,
434 ) -> Result<(usize, usize), EncodeError> {
435 if src.is_empty() {
436 return Err(EncodeError::InputIsEmpty);
437 }
438 if dst.len() < 4 {
439 return Err(EncodeError::OutputTooShort);
440 }
441
442 let mut read = 0;
443 let mut write = 0;
444 for c in src.chars() {
445 read += c.len_utf8();
446 let bytes = (c as u32).to_be_bytes();
447 dst[0] = bytes[2];
448 dst[1] = bytes[3];
449 dst[2] = bytes[0];
450 dst[3] = bytes[1];
451 dst = &mut dst[4..];
452 write += 4;
453 if dst.len() < 4 {
454 break;
455 }
456 }
457 Ok((read, write))
458 }
459}
460
461pub struct UCS4Unusual3412Decoder;
462impl Decoder for UCS4Unusual3412Decoder {
463 fn name(&self) -> &'static str {
464 UCS4_UNUSUAL_3412_NAME
465 }
466
467 fn decode(
468 &mut self,
469 src: &[u8],
470 dst: &mut String,
471 finish: bool,
472 ) -> Result<(usize, usize), DecodeError> {
473 if src.is_empty() {
474 return Err(DecodeError::InputIsEmpty);
475 }
476 let cap = dst.capacity() - dst.len();
477 if cap < 4 {
478 return Err(DecodeError::OutputTooShort);
479 }
480
481 let mut read = 0;
482 let mut write = 0;
483 for bytes in src.chunks_exact(4) {
484 read += 4;
485 let codepoint = u32::from_le_bytes([bytes[1], bytes[0], bytes[2], bytes[3]]);
486 match char::from_u32(codepoint) {
487 Some(c) => {
488 write += c.len_utf8();
489 dst.push(c);
490 }
491 None => {
492 return Err(DecodeError::Malformed {
493 read,
494 write,
495 length: 4,
496 offset: 0,
497 });
498 }
499 }
500 if dst.capacity() - dst.len() < 4 {
501 break;
502 }
503 }
504
505 let rem = src.len() - read;
506 if finish && rem < 4 && rem != 0 && dst.capacity() - dst.len() >= 4 {
507 return Err(DecodeError::Malformed {
508 read: src.len(),
509 write,
510 length: src.len() - read,
511 offset: 0,
512 });
513 }
514
515 Ok((read, write))
516 }
517}