1#![no_std]
28
29extern crate alloc;
30
31use alloc::string::String;
32use alloc::vec::Vec;
33
34const ILLEGALS: [u8; 6] = [0, 10, 13, 34, 38, 92];
35const SHORTENED: u8 = 0b111;
36const ASCII_MASK_8: u64 = 0x8080_8080_8080_8080;
37const LO7_8: u64 = 0x7F7F_7F7F_7F7F_7F7F;
38const Z0_MASK: u64 = 0x0000_0000_0000_0000;
39const Z10_MASK: u64 = 0x0A0A_0A0A_0A0A_0A0A;
40const Z13_MASK: u64 = 0x0D0D_0D0D_0D0D_0D0D;
41const Z34_MASK: u64 = 0x2222_2222_2222_2222;
42const Z38_MASK: u64 = 0x2626_2626_2626_2626;
43const Z92_MASK: u64 = 0x5C5C_5C5C_5C5C_5C5C;
44
45const CLASS: [u8; 128] = {
46 let mut arr = [0u8; 128];
47 arr[0] = 1;
48 arr[10] = 2;
49 arr[13] = 3;
50 arr[34] = 4;
51 arr[38] = 5;
52 arr[92] = 6;
53 arr
54};
55
56const ESCAPE_TABLE: [[u16; 128]; 7] = {
57 let mut table = [[0u16; 128]; 7];
58 let mut idx = 0usize;
59 while idx < 6 {
60 let mut next = 0usize;
61 while next < 128 {
62 let n = next as u8;
63 let b1 = 0b1100_0010 | (idx as u8) << 2 | (n >> 6);
64 let b2 = 0x80 | (n & 0x3F);
65 table[idx][next] = u16::from_le_bytes([b1, b2]);
66 next += 1;
67 }
68 idx += 1;
69 }
70 let mut next = 0usize;
71 while next < 128 {
72 let n = next as u8;
73 let b1 = 0b1100_0010 | (SHORTENED << 2) | (n >> 6);
74 let b2 = 0x80 | (n & 0x3F);
75 table[6][next] = u16::from_le_bytes([b1, b2]);
76 next += 1;
77 }
78 table
79};
80
81const LEAD_DECODE: [u8; 256] = {
82 let mut arr = [0xFFu8; 256];
83
84 let mut idx = 0usize;
85 while idx < 6 {
86 let mut bit = 0usize;
87 while bit < 2 {
88 let lead = 0b1100_0010 | ((idx as u8) << 2) | (bit as u8);
89 arr[lead as usize] = ((idx as u8) << 1) | (bit as u8);
90 bit += 1;
91 }
92 idx += 1;
93 }
94
95 let mut bit = 0usize;
96 while bit < 2 {
97 let lead = 0b1100_0010 | (SHORTENED << 2) | (bit as u8);
98 arr[lead as usize] = (SHORTENED << 1) | (bit as u8);
99 bit += 1;
100 }
101
102 arr
103};
104
105#[inline(always)]
106unsafe fn store_u64_le(dst: *mut u8, value: u64) {
107 unsafe { (dst as *mut u64).write_unaligned(value.to_le()) };
108}
109
110#[inline(always)]
111unsafe fn store_u16_le(dst: *mut u8, value: u16) {
112 unsafe { (dst as *mut u16).write_unaligned(value.to_le()) };
113}
114
115#[inline(always)]
116unsafe fn store_be_partial(dst: *mut u8, value: u64, len: usize) {
117 debug_assert!(len > 0 && len <= 8);
118 let shifted = value << ((8 - len) * 8);
119 unsafe { (dst as *mut u64).write_unaligned(shifted.to_be()) };
120}
121
122#[inline(always)]
123unsafe fn load_u64_le(ptr: *const u8) -> u64 {
124 u64::from_le(unsafe { (ptr as *const u64).read_unaligned() })
125}
126
127#[inline(always)]
128unsafe fn load56_be_overread1(ptr: *const u8) -> u64 {
129 u64::from_be(unsafe { (ptr as *const u64).read_unaligned() }) >> 8
130}
131
132#[inline(always)]
133unsafe fn load56_be_exact(ptr: *const u8) -> u64 {
134 #[cfg(miri)]
135 {
136 let mut tmp = 0u64;
137 unsafe { core::ptr::copy_nonoverlapping(ptr, &mut tmp as *mut u64 as *mut u8, 7) };
138 u64::from_be(tmp) >> 8
139 }
140 #[cfg(not(miri))]
141 {
142 unsafe {
143 let val = (ptr as *const u64).read_unaligned();
144 val.swap_bytes() >> 8
145 }
146 }
147}
148
149#[inline(always)]
150fn split56_to_groups_le(bits56: u64) -> u64 {
151 #[cfg(all(
152 any(target_arch = "x86_64", target_arch = "x86"),
153 target_feature = "bmi2"
154 ))]
155 {
156 let scattered = unsafe { core::arch::x86_64::_pdep_u64(bits56, LO7_8) };
157 return scattered.swap_bytes();
158 }
159 #[allow(unreachable_code)]
160 {
161 ((bits56 >> 49) & 0x7F)
162 | (((bits56 >> 42) & 0x7F) << 8)
163 | (((bits56 >> 35) & 0x7F) << 16)
164 | (((bits56 >> 28) & 0x7F) << 24)
165 | (((bits56 >> 21) & 0x7F) << 32)
166 | (((bits56 >> 14) & 0x7F) << 40)
167 | (((bits56 >> 7) & 0x7F) << 48)
168 | ((bits56 & 0x7F) << 56)
169 }
170}
171
172#[inline(always)]
173fn gather_groups_to_bits56(chunk_le: u64) -> u64 {
174 #[cfg(all(
175 any(target_arch = "x86_64", target_arch = "x86"),
176 target_feature = "bmi2"
177 ))]
178 {
179 return unsafe { core::arch::x86_64::_pext_u64(chunk_le.swap_bytes(), LO7_8) };
180 }
181 #[allow(unreachable_code)]
182 {
183 let g0 = (chunk_le & 0xFF) as u64;
184 let g1 = ((chunk_le >> 8) & 0xFF) as u64;
185 let g2 = ((chunk_le >> 16) & 0xFF) as u64;
186 let g3 = ((chunk_le >> 24) & 0xFF) as u64;
187 let g4 = ((chunk_le >> 32) & 0xFF) as u64;
188 let g5 = ((chunk_le >> 40) & 0xFF) as u64;
189 let g6 = ((chunk_le >> 48) & 0xFF) as u64;
190 let g7 = ((chunk_le >> 56) & 0xFF) as u64;
191
192 (g0 << 49) | (g1 << 42) | (g2 << 35) | (g3 << 28) | (g4 << 21) | (g5 << 14) | (g6 << 7) | g7
193 }
194}
195
196#[inline(always)]
197fn illegal_high_mask(x: u64) -> u64 {
198 let z0 = x ^ Z0_MASK;
199 let z10 = x ^ Z10_MASK;
200 let z13 = x ^ Z13_MASK;
201 let z34 = x ^ Z34_MASK;
202 let z38 = x ^ Z38_MASK;
203 let z92 = x ^ Z92_MASK;
204
205 let t0 = !z0.wrapping_add(LO7_8);
206 let t10 = !z10.wrapping_add(LO7_8);
207 let t13 = !z13.wrapping_add(LO7_8);
208 let t34 = !z34.wrapping_add(LO7_8);
209 let t38 = !z38.wrapping_add(LO7_8);
210 let t92 = !z92.wrapping_add(LO7_8);
211
212 (t0 | t10 | t13 | t34 | t38 | t92) & ASCII_MASK_8
213}
214
215#[inline(always)]
216unsafe fn emit_escape_pair(out_ptr: *mut u8, out_pos: &mut usize, first: u8, next: u8) {
217 let class = unsafe { *CLASS.get_unchecked(first as usize) };
218 debug_assert!(class >= 1 && class <= 6);
219 let idx = (class - 1) as usize;
220 let pair = unsafe { *ESCAPE_TABLE.get_unchecked(idx).get_unchecked(next as usize) };
221 unsafe { store_u16_le(out_ptr.add(*out_pos), pair) };
222 *out_pos += 2;
223}
224
225#[inline(always)]
226unsafe fn emit_shortened(out_ptr: *mut u8, out_pos: &mut usize, bits: u8) {
227 let pair = unsafe { *ESCAPE_TABLE.get_unchecked(6).get_unchecked(bits as usize) };
228 unsafe { store_u16_le(out_ptr.add(*out_pos), pair) };
229 *out_pos += 2;
230}
231
232#[inline(always)]
233fn pull7_tail(tail: &[u8], pos: &mut usize, acc: &mut u64, acc_bits: &mut u32) -> Option<u8> {
234 while *acc_bits < 7 && *pos < tail.len() {
235 *acc = (*acc << 8) | tail[*pos] as u64;
236 *pos += 1;
237 *acc_bits += 8;
238 }
239
240 if *acc_bits >= 7 {
241 *acc_bits -= 7;
242 let bits = ((*acc >> *acc_bits) & 0x7F) as u8;
243 if *acc_bits == 0 {
244 *acc = 0;
245 } else {
246 *acc &= (1_u64 << *acc_bits) - 1;
247 }
248 Some(bits)
249 } else if *acc_bits > 0 {
250 let bits = ((*acc << (7 - *acc_bits)) & 0x7F) as u8;
251 *acc = 0;
252 *acc_bits = 0;
253 Some(bits)
254 } else {
255 None
256 }
257}
258
259#[inline(always)]
260fn group_count(input_len: usize) -> usize {
261 if input_len == 0 {
262 0
263 } else {
264 input_len.saturating_mul(8).saturating_add(6) / 7
265 }
266}
267
268#[inline(always)]
269fn encoded_capacity(input_len: usize) -> usize {
270 group_count(input_len).saturating_mul(2).saturating_add(8)
271}
272
273#[inline(always)]
274fn decoded_capacity(encoded_len: usize) -> usize {
275 encoded_len
276 .saturating_mul(7)
277 .saturating_add(7)
278 .saturating_div(8)
279 .saturating_add(8)
280}
281
282#[inline(always)]
283unsafe fn process_groups8_masked(
284 mut groups_le: u64,
285 mut illegal_mask: u64,
286 out_ptr: *mut u8,
287 out_pos: &mut usize,
288 pending_illegal_bits: &mut u8,
289 has_pending_illegal: &mut bool,
290) {
291 let mut remaining = 8usize;
292
293 if *has_pending_illegal {
294 let next = groups_le as u8;
295 unsafe { emit_escape_pair(out_ptr, out_pos, *pending_illegal_bits, next) };
296 *has_pending_illegal = false;
297 groups_le >>= 8;
298 illegal_mask >>= 8;
299 remaining -= 1;
300 }
301
302 if illegal_mask == 0 {
303 unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
304 *out_pos += remaining;
305 return;
306 }
307
308 if (illegal_mask & (illegal_mask - 1)) == 0 {
309 let prefix = (illegal_mask.trailing_zeros() >> 3) as usize;
310 if prefix != 0 {
311 unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
312 *out_pos += prefix;
313 groups_le >>= prefix * 8;
314 remaining -= prefix;
315 }
316
317 let cur = groups_le as u8;
318 if remaining > 1 {
319 let next = (groups_le >> 8) as u8;
320 unsafe { emit_escape_pair(out_ptr, out_pos, cur, next) };
321 groups_le >>= 16;
322 remaining -= 2;
323
324 if remaining != 0 {
325 unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
326 *out_pos += remaining;
327 }
328 } else {
329 *pending_illegal_bits = cur;
330 *has_pending_illegal = true;
331 }
332 return;
333 }
334
335 while remaining != 0 {
336 if illegal_mask == 0 {
337 unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
338 *out_pos += remaining;
339 break;
340 }
341
342 let prefix = (illegal_mask.trailing_zeros() >> 3) as usize;
343 if prefix != 0 {
344 unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
345 *out_pos += prefix;
346 groups_le >>= prefix * 8;
347 illegal_mask >>= prefix * 8;
348 remaining -= prefix;
349 }
350
351 let cur = groups_le as u8;
352 if remaining > 1 {
353 let next = (groups_le >> 8) as u8;
354 unsafe { emit_escape_pair(out_ptr, out_pos, cur, next) };
355 groups_le >>= 16;
356 illegal_mask >>= 16;
357 remaining -= 2;
358 } else {
359 *pending_illegal_bits = cur;
360 *has_pending_illegal = true;
361 break;
362 }
363 }
364}
365
366pub fn encode(data: &[u8]) -> String {
385 if data.is_empty() {
386 return String::new();
387 }
388
389 let mut out = Vec::<u8>::with_capacity(encoded_capacity(data.len()));
390 let out_ptr = out.as_mut_ptr();
391
392 let len = data.len();
393 let ptr = data.as_ptr();
394
395 let mut out_pos = 0usize;
396 let mut i = 0usize;
397 let mut pending_illegal_bits = 0u8;
398 let mut has_pending_illegal = false;
399
400 while i + 29 <= len {
401 let bits56_a = unsafe { load56_be_overread1(ptr.add(i)) };
402 let bits56_b = unsafe { load56_be_overread1(ptr.add(i + 7)) };
403 let bits56_c = unsafe { load56_be_overread1(ptr.add(i + 14)) };
404 let bits56_d = unsafe { load56_be_overread1(ptr.add(i + 21)) };
405
406 let groups_a = split56_to_groups_le(bits56_a);
407 let groups_b = split56_to_groups_le(bits56_b);
408 let groups_c = split56_to_groups_le(bits56_c);
409 let groups_d = split56_to_groups_le(bits56_d);
410
411 let mask_a = illegal_high_mask(groups_a);
412 let mask_b = illegal_high_mask(groups_b);
413 let mask_c = illegal_high_mask(groups_c);
414 let mask_d = illegal_high_mask(groups_d);
415
416 if !has_pending_illegal && (mask_a | mask_b | mask_c | mask_d) == 0 {
417 unsafe {
418 store_u64_le(out_ptr.add(out_pos), groups_a);
419 store_u64_le(out_ptr.add(out_pos + 8), groups_b);
420 store_u64_le(out_ptr.add(out_pos + 16), groups_c);
421 store_u64_le(out_ptr.add(out_pos + 24), groups_d);
422 }
423 out_pos += 32;
424 i += 28;
425 continue;
426 }
427
428 unsafe {
429 process_groups8_masked(
430 groups_a,
431 mask_a,
432 out_ptr,
433 &mut out_pos,
434 &mut pending_illegal_bits,
435 &mut has_pending_illegal,
436 );
437 process_groups8_masked(
438 groups_b,
439 mask_b,
440 out_ptr,
441 &mut out_pos,
442 &mut pending_illegal_bits,
443 &mut has_pending_illegal,
444 );
445 process_groups8_masked(
446 groups_c,
447 mask_c,
448 out_ptr,
449 &mut out_pos,
450 &mut pending_illegal_bits,
451 &mut has_pending_illegal,
452 );
453 process_groups8_masked(
454 groups_d,
455 mask_d,
456 out_ptr,
457 &mut out_pos,
458 &mut pending_illegal_bits,
459 &mut has_pending_illegal,
460 );
461 }
462 i += 28;
463 }
464
465 while i + 15 <= len {
466 let bits56_a = unsafe { load56_be_overread1(ptr.add(i)) };
467 let bits56_b = unsafe { load56_be_overread1(ptr.add(i + 7)) };
468
469 let groups_a = split56_to_groups_le(bits56_a);
470 let groups_b = split56_to_groups_le(bits56_b);
471
472 let mask_a = illegal_high_mask(groups_a);
473 let mask_b = illegal_high_mask(groups_b);
474
475 if !has_pending_illegal && (mask_a | mask_b) == 0 {
476 unsafe {
477 store_u64_le(out_ptr.add(out_pos), groups_a);
478 store_u64_le(out_ptr.add(out_pos + 8), groups_b);
479 }
480 out_pos += 16;
481 i += 14;
482 continue;
483 }
484
485 unsafe {
486 process_groups8_masked(
487 groups_a,
488 mask_a,
489 out_ptr,
490 &mut out_pos,
491 &mut pending_illegal_bits,
492 &mut has_pending_illegal,
493 );
494 process_groups8_masked(
495 groups_b,
496 mask_b,
497 out_ptr,
498 &mut out_pos,
499 &mut pending_illegal_bits,
500 &mut has_pending_illegal,
501 );
502 }
503 i += 14;
504 }
505
506 while i + 8 <= len {
507 let bits56 = unsafe { load56_be_overread1(ptr.add(i)) };
508 let groups_le = split56_to_groups_le(bits56);
509 let mask = illegal_high_mask(groups_le);
510
511 if !has_pending_illegal && mask == 0 {
512 unsafe { store_u64_le(out_ptr.add(out_pos), groups_le) };
513 out_pos += 8;
514 i += 7;
515 continue;
516 }
517
518 unsafe {
519 process_groups8_masked(
520 groups_le,
521 mask,
522 out_ptr,
523 &mut out_pos,
524 &mut pending_illegal_bits,
525 &mut has_pending_illegal,
526 );
527 }
528 i += 7;
529 }
530
531 if i + 7 <= len {
532 let bits56 = unsafe { load56_be_exact(ptr.add(i)) };
533 let groups_le = split56_to_groups_le(bits56);
534 let mask = illegal_high_mask(groups_le);
535
536 if !has_pending_illegal && mask == 0 {
537 unsafe { store_u64_le(out_ptr.add(out_pos), groups_le) };
538 out_pos += 8;
539 i += 7;
540 } else {
541 unsafe {
542 process_groups8_masked(
543 groups_le,
544 mask,
545 out_ptr,
546 &mut out_pos,
547 &mut pending_illegal_bits,
548 &mut has_pending_illegal,
549 );
550 }
551 i += 7;
552 }
553 }
554
555 let tail = &data[i..];
556 let mut tail_pos = 0usize;
557 let mut acc = 0u64;
558 let mut acc_bits = 0u32;
559
560 if has_pending_illegal {
561 if let Some(nb) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
562 unsafe { emit_escape_pair(out_ptr, &mut out_pos, pending_illegal_bits, nb) };
563 } else {
564 unsafe { emit_shortened(out_ptr, &mut out_pos, pending_illegal_bits) };
565 }
566 }
567
568 while let Some(cur) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
569 let class = unsafe { *CLASS.get_unchecked(cur as usize) };
570 if class == 0 {
571 unsafe { *out_ptr.add(out_pos) = cur };
572 out_pos += 1;
573 } else if let Some(nb) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
574 unsafe { emit_escape_pair(out_ptr, &mut out_pos, cur, nb) };
575 } else {
576 unsafe { emit_shortened(out_ptr, &mut out_pos, cur) };
577 break;
578 }
579 }
580
581 unsafe {
582 out.set_len(out_pos);
583 String::from_utf8_unchecked(out)
584 }
585}
586
587#[inline(always)]
588unsafe fn unpack8groups_chunk_le(
589 chunk_le: u64,
590 out_ptr: *mut u8,
591 out_pos: &mut usize,
592 acc: &mut u64,
593 acc_bits: &mut u32,
594) {
595 let bits56 = gather_groups_to_bits56(chunk_le);
596 let k = *acc_bits;
597
598 let combined = (*acc << 56) | bits56;
599 unsafe { store_be_partial(out_ptr.add(*out_pos), combined >> k, 7) };
600 *out_pos += 7;
601 *acc = combined & ((1u64 << k).wrapping_sub(1));
602}
603
604#[inline(always)]
605unsafe fn push_ascii_prefix_le(
606 mut chunk_le: u64,
607 count: usize,
608 out_ptr: *mut u8,
609 out_pos: &mut usize,
610 acc: &mut u64,
611 acc_bits: &mut u32,
612) {
613 debug_assert!(count <= 7);
614
615 let mut packed = 0u64;
616 let mut n = 0usize;
617 while n < count {
618 packed = (packed << 7) | ((chunk_le as u8) as u64);
619 chunk_le >>= 8;
620 n += 1;
621 }
622 let add_bits = (count as u32) * 7;
623 let total_bits = *acc_bits + add_bits;
624 let new_bits = total_bits & 7;
625 let emitted = (total_bits >> 3) as usize;
626 let combined = (*acc << add_bits) | packed;
627
628 if emitted != 0 {
629 unsafe { store_be_partial(out_ptr.add(*out_pos), combined >> new_bits, emitted) };
630 *out_pos += emitted;
631 }
632
633 *acc_bits = new_bits;
634 *acc = combined & ((1u64 << new_bits).wrapping_sub(1));
635}
636
637#[inline(always)]
638unsafe fn push7_scalar(
639 out_ptr: *mut u8,
640 out_pos: &mut usize,
641 acc: &mut u64,
642 acc_bits: &mut u32,
643 bits: u8,
644) {
645 let combined = (*acc << 7) | (bits as u64);
646 let total_bits = *acc_bits + 7;
647
648 if total_bits >= 8 {
649 let new_bits = total_bits - 8;
650 unsafe { *out_ptr.add(*out_pos) = (combined >> new_bits) as u8 };
651 *out_pos += 1;
652 *acc_bits = new_bits;
653 *acc = combined & ((1u64 << new_bits).wrapping_sub(1));
654 } else {
655 *acc_bits = total_bits;
656 *acc = combined;
657 }
658}
659
660#[inline(always)]
661unsafe fn push14_scalar(
662 out_ptr: *mut u8,
663 out_pos: &mut usize,
664 acc: &mut u64,
665 acc_bits: &mut u32,
666 hi7: u8,
667 lo7: u8,
668) {
669 let combined = (*acc << 14) | ((hi7 as u64) << 7) | (lo7 as u64);
670 let total_bits = *acc_bits + 14;
671 let new_bits = total_bits & 7;
672 let emitted = total_bits >> 3;
673 let out_bits = combined >> new_bits;
674
675 if emitted == 2 {
676 unsafe { (out_ptr.add(*out_pos) as *mut u16).write_unaligned((out_bits as u16).to_be()) };
677 *out_pos += 2;
678 } else {
679 unsafe { *out_ptr.add(*out_pos) = out_bits as u8 };
680 *out_pos += 1;
681 }
682
683 *acc_bits = new_bits;
684 *acc = combined & ((1u64 << new_bits).wrapping_sub(1));
685}
686
687#[cold]
688#[inline(never)]
689fn decode_err<T>(msg: &'static str) -> Result<T, &'static str> {
690 Err(msg)
691}
692
693#[inline(always)]
694unsafe fn break_to_scalar(
695 ptr: *const u8,
696 i: &mut usize,
697 len: usize,
698 out_ptr: *mut u8,
699 out_pos: &mut usize,
700 acc: &mut u64,
701 acc_bits: &mut u32,
702) -> Result<(), &'static str> {
703 if *i + 2 > len {
704 return decode_err("Unexpected end of input");
705 }
706 let b1 = unsafe { *ptr.add(*i) };
707 let code = LEAD_DECODE[b1 as usize];
708 if code == 0xFF {
709 return decode_err("Invalid lead byte");
710 }
711 let b2 = unsafe { *ptr.add(*i + 1) };
712 if (b2 & 0xC0) != 0x80 {
713 return decode_err("Invalid continuation byte");
714 }
715 *i += 2;
716 let illegal_index = code >> 1;
717 let first_bit = code & 1;
718 let next = (first_bit << 6) | (b2 & 0x3F);
719
720 unsafe {
721 if illegal_index < 6 {
722 push14_scalar(
723 out_ptr,
724 out_pos,
725 acc,
726 acc_bits,
727 ILLEGALS[illegal_index as usize],
728 next,
729 );
730 } else {
731 debug_assert_eq!(illegal_index, SHORTENED);
732 push7_scalar(out_ptr, out_pos, acc, acc_bits, next);
733 }
734 }
735 Ok(())
736}
737
738pub fn decode(encoded: &str) -> Result<Vec<u8>, &'static str> {
763 if encoded.is_empty() {
764 return Ok(Vec::new());
765 }
766
767 let bytes = encoded.as_bytes();
768 let len = bytes.len();
769 let mut out = Vec::<u8>::with_capacity(decoded_capacity(len));
770 let out_ptr = out.as_mut_ptr();
771
772 let ptr = bytes.as_ptr();
773 let mut out_pos = 0usize;
774 let mut acc = 0u64;
775 let mut acc_bits = 0u32;
776 let mut i = 0usize;
777
778 while i + 8 <= len {
779 let chunk = unsafe { load_u64_le(ptr.add(i)) };
780 let high = chunk & ASCII_MASK_8;
781
782 if high == 0 {
783 unsafe {
784 unpack8groups_chunk_le(chunk, out_ptr, &mut out_pos, &mut acc, &mut acc_bits);
785 }
786 i += 8;
787 continue;
788 }
789
790 let ascii_prefix = (high.trailing_zeros() >> 3) as usize;
791 if ascii_prefix != 0 {
792 unsafe {
793 push_ascii_prefix_le(
794 chunk,
795 ascii_prefix,
796 out_ptr,
797 &mut out_pos,
798 &mut acc,
799 &mut acc_bits,
800 );
801 }
802 i += ascii_prefix;
803 continue;
804 }
805
806 (unsafe {
807 break_to_scalar(
808 ptr,
809 &mut i,
810 len,
811 out_ptr,
812 &mut out_pos,
813 &mut acc,
814 &mut acc_bits,
815 )
816 })?;
817 }
818
819 while i < len {
820 let b = unsafe { *ptr.add(i) };
821 if b < 128 {
822 i += 1;
823 unsafe { push7_scalar(out_ptr, &mut out_pos, &mut acc, &mut acc_bits, b) };
824 continue;
825 }
826
827 let code = LEAD_DECODE[b as usize];
828 if code == 0xFF {
829 return decode_err("Invalid lead byte");
830 }
831
832 if i + 1 >= len {
833 return decode_err("Unexpected end of input");
834 }
835
836 let b2 = unsafe { *ptr.add(i + 1) };
837 if (b2 & 0xC0) != 0x80 {
838 return decode_err("Invalid continuation byte");
839 }
840
841 i += 2;
842
843 let illegal_index = code >> 1;
844 let first_bit = code & 1;
845 let next = (first_bit << 6) | (b2 & 0x3F);
846
847 unsafe {
848 if illegal_index < 6 {
849 push14_scalar(
850 out_ptr,
851 &mut out_pos,
852 &mut acc,
853 &mut acc_bits,
854 ILLEGALS[illegal_index as usize],
855 next,
856 );
857 } else {
858 debug_assert_eq!(illegal_index, SHORTENED);
859 push7_scalar(out_ptr, &mut out_pos, &mut acc, &mut acc_bits, next);
860 }
861 }
862 }
863
864 unsafe { out.set_len(out_pos) };
865 Ok(out)
866}
867
868#[cfg(test)]
869mod tests {
870 use super::*;
871 use alloc::{format, vec};
872
873 #[test]
874 fn test_empty() {
875 assert_eq!(encode(b""), "");
876 assert_eq!(decode("").unwrap(), b"");
877 }
878
879 #[test]
880 fn test_hello_world() {
881 let data = b"hello world";
882 let enc = encode(data);
883 let dec = decode(&enc).expect("decoding failed");
884 assert_eq!(dec, data);
885 }
886
887 #[test]
888 fn test_single_byte_values() {
889 for b in 0..=255u8 {
890 let data = vec![b];
891 let enc = encode(&data);
892 let dec = decode(&enc).expect(&format!("decoding failed for byte {}", b));
893 assert_eq!(dec, data, "failed for byte {}", b);
894 }
895 }
896
897 #[test]
898 fn test_various_lengths_roundtrip() {
899 for len in [
900 0, 1, 2, 3, 6, 7, 8, 9, 14, 15, 16, 17, 31, 32, 33, 100, 255, 256, 511, 512,
901 ] {
902 let data: Vec<u8> = (0..len).map(|i| (i % 251) as u8).collect();
903 let enc = encode(&data);
904 let dec = decode(&enc).expect("decoding failed");
905 assert_eq!(dec, data, "roundtrip failed for length {}", len);
906 }
907 }
908
909 #[test]
910 fn test_all_illegal_bytes_handling() {
911 let data = b"\x00\x0A\x0D\x22\x26\x5C";
912 let enc = encode(data);
913 let dec = decode(&enc).expect("decoding failed");
914 assert_eq!(dec, data.as_ref());
915 }
916
917 #[test]
918 fn test_mixed_content() {
919 let data: Vec<u8> = (0..=255).collect();
920 let enc = encode(&data);
921 let dec = decode(&enc).expect("decoding failed");
922 assert_eq!(dec, data);
923 }
924
925 #[test]
926 fn test_repeated_illegal_bytes() {
927 let data = vec![0u8; 100];
928 let enc = encode(&data);
929 let dec = decode(&enc).expect("decoding failed");
930 assert_eq!(dec, data);
931 }
932
933 #[test]
934 fn test_decode_invalid_lead_byte() {
935 let invalid = vec![0x80u8];
936 let s = unsafe { String::from_utf8_unchecked(invalid) };
937 assert!(decode(&s).is_err());
938 let invalid2 = vec![0xFFu8];
939 let s2 = unsafe { String::from_utf8_unchecked(invalid2) };
940 assert!(decode(&s2).is_err());
941 }
942
943 #[test]
944 fn test_decode_truncated_escape() {
945 let mut data = vec![0xC0u8];
946 let s = unsafe { String::from_utf8_unchecked(data.clone()) };
947 assert!(decode(&s).is_err());
948 data.push(0x40);
949 let s2 = unsafe { String::from_utf8_unchecked(data) };
950 assert!(decode(&s2).is_err());
951 }
952
953 #[test]
954 fn test_decode_invalid_continuation_byte() {
955 let data = vec![0xC2u8, 0xFF];
956 let s = unsafe { String::from_utf8_unchecked(data) };
957 assert!(decode(&s).is_err());
958 }
959
960 #[test]
961 fn test_shortened_at_end() {
962 let data = vec![0u8];
963 let enc = encode(&data);
964 let dec = decode(&enc).expect("decode failed");
965 assert_eq!(dec, data);
966 }
967
968 #[test]
969 fn test_very_long_input() {
970 use rand::Rng;
971 const DATA_SIZE: usize = 5_000_000;
972 let mut data = vec![0u8; DATA_SIZE];
973 let mut rng = rand::rng();
974 rng.fill_bytes(&mut data);
975 let enc = encode(&data);
976 let dec = decode(&enc).expect("decode failed");
977 assert_eq!(dec, data);
978 }
979}