Skip to main content

base122_fast/
lib.rs

1//! # Base122-Fast
2//!
3//! A high-performance Base122 implementation achieving throughput up to **5.6 Gbps (encoding)**
4//! and **4.9 Gbps (decoding)** on modern hardware (e.g., AMD Ryzen 5 5600, single-threaded).
5//!
6//! Base122 is a binary-to-text encoding scheme that is significantly more space-efficient
7//! than Base64, offering approximately **14% overhead** compared to Base64's 33%, while
8//! remaining valid UTF-8.
9//!
10//! ## Key Features
11//!
12//! *   **High Throughput**: Optimized for Gbps-level processing.
13//! *   **no_std**: Suitable for embedded systems and WASM (requires `alloc`).
14//! *   **SIMD Within A Register**: Uses SWAR techniques to process multiple bytes in 64-bit registers.
15//! *   **Safety**: While leveraging `unsafe` for performance, it is rigorously tested for round-trip integrity.
16//!
17//! ## Quick Start
18//!
19//! ```rust
20//! let data = b"hello world";
21//! let encoded = base122_fast::encode(data);
22//! let decoded = base122_fast::decode(&encoded).expect("Failed to decode");
23//!
24//! assert_eq!(data, decoded.as_slice());
25//! ```
26
27#![no_std]
28
29extern crate alloc;
30
31use alloc::string::String;
32use alloc::vec::Vec;
33
34const ILLEGALS: [u8; 6] = [0, 10, 13, 34, 38, 92];
35const SHORTENED: u8 = 0b111;
36const ASCII_MASK_8: u64 = 0x8080_8080_8080_8080;
37
38const IS_ILLEGAL: [u8; 128] = {
39    let mut arr = [0u8; 128];
40    arr[0] = 0x80;
41    arr[10] = 0x80;
42    arr[13] = 0x80;
43    arr[34] = 0x80;
44    arr[38] = 0x80;
45    arr[92] = 0x80;
46    arr
47};
48
49const CLASS: [u8; 128] = {
50    let mut arr = [0u8; 128];
51    arr[0] = 1;
52    arr[10] = 2;
53    arr[13] = 3;
54    arr[34] = 4;
55    arr[38] = 5;
56    arr[92] = 6;
57    arr
58};
59
60const ESCAPE_TABLE: [[u16; 128]; 7] = {
61    let mut table = [[0u16; 128]; 7];
62    let mut idx = 0usize;
63    while idx < 6 {
64        let mut next = 0usize;
65        while next < 128 {
66            let n = next as u8;
67            let b1 = 0b1100_0010 | (idx as u8) << 2 | (n >> 6);
68            let b2 = 0x80 | (n & 0x3F);
69            table[idx][next] = u16::from_le_bytes([b1, b2]);
70            next += 1;
71        }
72        idx += 1;
73    }
74    let mut next = 0usize;
75    while next < 128 {
76        let n = next as u8;
77        let b1 = 0b1100_0010 | (SHORTENED << 2) | (n >> 6);
78        let b2 = 0x80 | (n & 0x3F);
79        table[6][next] = u16::from_le_bytes([b1, b2]);
80        next += 1;
81    }
82    table
83};
84
85const LEAD_DECODE: [u8; 256] = {
86    let mut arr = [0xFFu8; 256];
87
88    let mut idx = 0usize;
89    while idx < 6 {
90        let mut bit = 0usize;
91        while bit < 2 {
92            let lead = 0b1100_0010 | ((idx as u8) << 2) | (bit as u8);
93            arr[lead as usize] = ((idx as u8) << 1) | (bit as u8);
94            bit += 1;
95        }
96        idx += 1;
97    }
98
99    let mut bit = 0usize;
100    while bit < 2 {
101        let lead = 0b1100_0010 | (SHORTENED << 2) | (bit as u8);
102        arr[lead as usize] = (SHORTENED << 1) | (bit as u8);
103        bit += 1;
104    }
105
106    arr
107};
108
109#[inline(always)]
110unsafe fn store_u64_le(dst: *mut u8, value: u64) {
111    unsafe { (dst as *mut u64).write_unaligned(value.to_le()) };
112}
113
114#[inline(always)]
115unsafe fn store_u16_le(dst: *mut u8, value: u16) {
116    unsafe { (dst as *mut u16).write_unaligned(value.to_le()) };
117}
118
119#[inline(always)]
120unsafe fn store_be_partial(dst: *mut u8, value: u64, len: usize) {
121    debug_assert!(len > 0 && len <= 8);
122    let shifted = value << ((8 - len) * 8);
123    unsafe { (dst as *mut u64).write_unaligned(shifted.to_be()) };
124}
125
126#[inline(always)]
127unsafe fn load_u64_le(ptr: *const u8) -> u64 {
128    u64::from_le(unsafe { (ptr as *const u64).read_unaligned() })
129}
130
131#[inline(always)]
132unsafe fn emit_escape_pair(out_ptr: *mut u8, out_pos: &mut usize, first: u8, next: u8) {
133    let class = unsafe { *CLASS.get_unchecked(first as usize) };
134    debug_assert!(class >= 1 && class <= 6);
135    let idx = (class - 1) as usize;
136    let pair = unsafe { *ESCAPE_TABLE.get_unchecked(idx).get_unchecked(next as usize) };
137    unsafe { store_u16_le(out_ptr.add(*out_pos), pair) };
138    *out_pos += 2;
139}
140
141#[inline(always)]
142unsafe fn emit_shortened(out_ptr: *mut u8, out_pos: &mut usize, bits: u8) {
143    let pair = unsafe { *ESCAPE_TABLE.get_unchecked(6).get_unchecked(bits as usize) };
144    unsafe { store_u16_le(out_ptr.add(*out_pos), pair) };
145    *out_pos += 2;
146}
147
148#[inline(always)]
149fn pull7_tail(tail: &[u8], pos: &mut usize, acc: &mut u64, acc_bits: &mut u32) -> Option<u8> {
150    while *acc_bits < 7 && *pos < tail.len() {
151        *acc = (*acc << 8) | tail[*pos] as u64;
152        *pos += 1;
153        *acc_bits += 8;
154    }
155
156    if *acc_bits >= 7 {
157        *acc_bits -= 7;
158        let bits = ((*acc >> *acc_bits) & 0x7F) as u8;
159        if *acc_bits == 0 {
160            *acc = 0;
161        } else {
162            *acc &= (1_u64 << *acc_bits) - 1;
163        }
164        Some(bits)
165    } else if *acc_bits > 0 {
166        let bits = ((*acc << (7 - *acc_bits)) & 0x7F) as u8;
167        *acc = 0;
168        *acc_bits = 0;
169        Some(bits)
170    } else {
171        None
172    }
173}
174
175#[inline(always)]
176fn group_count(input_len: usize) -> usize {
177    if input_len == 0 {
178        0
179    } else {
180        input_len.saturating_mul(8).saturating_add(6) / 7
181    }
182}
183
184#[inline(always)]
185fn encoded_capacity(input_len: usize) -> usize {
186    group_count(input_len).saturating_mul(2).saturating_add(8)
187}
188
189#[inline(always)]
190fn decoded_capacity(encoded_len: usize) -> usize {
191    encoded_len
192        .saturating_mul(7)
193        .saturating_add(7)
194        .saturating_div(8)
195        .saturating_add(8)
196}
197
198#[inline(always)]
199unsafe fn load56_be_overread1(ptr: *const u8) -> u64 {
200    u64::from_be(unsafe { (ptr as *const u64).read_unaligned() }) >> 8
201}
202
203#[inline(always)]
204unsafe fn load56_be_exact(ptr: *const u8) -> u64 {
205    #[cfg(miri)]
206    {
207        let mut tmp = 0u64;
208        unsafe { core::ptr::copy_nonoverlapping(ptr, &mut tmp as *mut u64 as *mut u8, 7) };
209        u64::from_be(tmp) >> 8
210    }
211    #[cfg(not(miri))]
212    {
213        unsafe {
214            let val = (ptr as *const u64).read_unaligned();
215            val.swap_bytes() >> 8
216        }
217    }
218}
219
220#[inline(always)]
221fn split56_to_groups_le(bits56: u64) -> u64 {
222    ((bits56 >> 49) & 0x7F)
223        | (((bits56 >> 42) & 0x7F) << 8)
224        | (((bits56 >> 35) & 0x7F) << 16)
225        | (((bits56 >> 28) & 0x7F) << 24)
226        | (((bits56 >> 21) & 0x7F) << 32)
227        | (((bits56 >> 14) & 0x7F) << 40)
228        | (((bits56 >> 7) & 0x7F) << 48)
229        | ((bits56 & 0x7F) << 56)
230}
231
232#[inline(always)]
233fn illegal_high_mask(x: u64) -> u64 {
234    let b0 = IS_ILLEGAL[(x & 0x7F) as usize] as u64;
235    let b1 = IS_ILLEGAL[((x >> 8) & 0x7F) as usize] as u64;
236    let b2 = IS_ILLEGAL[((x >> 16) & 0x7F) as usize] as u64;
237    let b3 = IS_ILLEGAL[((x >> 24) & 0x7F) as usize] as u64;
238    let b4 = IS_ILLEGAL[((x >> 32) & 0x7F) as usize] as u64;
239    let b5 = IS_ILLEGAL[((x >> 40) & 0x7F) as usize] as u64;
240    let b6 = IS_ILLEGAL[((x >> 48) & 0x7F) as usize] as u64;
241    let b7 = IS_ILLEGAL[((x >> 56) & 0x7F) as usize] as u64;
242    b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) | (b4 << 32) | (b5 << 40) | (b6 << 48) | (b7 << 56)
243}
244
245#[inline(always)]
246unsafe fn process_groups8_masked(
247    mut groups_le: u64,
248    mut illegal_mask: u64,
249    out_ptr: *mut u8,
250    out_pos: &mut usize,
251    pending_illegal_bits: &mut u8,
252    has_pending_illegal: &mut bool,
253) {
254    let mut remaining = 8usize;
255
256    if *has_pending_illegal {
257        let next = groups_le as u8;
258        unsafe { emit_escape_pair(out_ptr, out_pos, *pending_illegal_bits, next) };
259        *has_pending_illegal = false;
260        groups_le >>= 8;
261        illegal_mask >>= 8;
262        remaining -= 1;
263    }
264
265    while remaining != 0 {
266        if illegal_mask == 0 {
267            unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
268            *out_pos += remaining;
269            break;
270        }
271
272        let prefix = (illegal_mask.trailing_zeros() >> 3) as usize;
273        if prefix != 0 {
274            unsafe { store_u64_le(out_ptr.add(*out_pos), groups_le) };
275            *out_pos += prefix;
276            groups_le >>= prefix * 8;
277            illegal_mask >>= prefix * 8;
278            remaining -= prefix;
279        }
280
281        let cur = groups_le as u8;
282        if remaining > 1 {
283            let next = (groups_le >> 8) as u8;
284            unsafe { emit_escape_pair(out_ptr, out_pos, cur, next) };
285            groups_le >>= 16;
286            illegal_mask >>= 16;
287            remaining -= 2;
288        } else {
289            *pending_illegal_bits = cur;
290            *has_pending_illegal = true;
291            break;
292        }
293    }
294}
295
296/// Encodes binary data into a Base122 string.
297///
298/// The resulting string is guaranteed to be valid UTF-8 and is more compact than
299/// Base64, typically resulting in only ~14% size overhead.
300///
301/// # Performance
302///
303/// This function utilizes a fast-path execution model that scans 8-byte chunks
304/// using SWAR logic. It is optimized for long buffers where ASCII-compatible
305/// sequences are common.
306///
307/// # Examples
308///
309/// ```rust
310/// let data = b"data";
311/// let encoded = base122_fast::encode(data);
312/// assert!(!encoded.is_empty());
313/// ```
314pub fn encode(data: &[u8]) -> String {
315    if data.is_empty() {
316        return String::new();
317    }
318
319    let mut out = Vec::<u8>::with_capacity(encoded_capacity(data.len()));
320    let out_ptr = out.as_mut_ptr();
321
322    let len = data.len();
323    let ptr = data.as_ptr();
324
325    let mut out_pos = 0usize;
326    let mut i = 0usize;
327    let mut pending_illegal_bits = 0u8;
328    let mut has_pending_illegal = false;
329
330    while i + 15 <= len {
331        let bits56_a = unsafe { load56_be_overread1(ptr.add(i)) };
332        let bits56_b = unsafe { load56_be_overread1(ptr.add(i + 7)) };
333
334        let groups_a = split56_to_groups_le(bits56_a);
335        let groups_b = split56_to_groups_le(bits56_b);
336
337        let mask_a = illegal_high_mask(groups_a);
338        let mask_b = illegal_high_mask(groups_b);
339
340        if !has_pending_illegal && (mask_a | mask_b) == 0 {
341            unsafe {
342                store_u64_le(out_ptr.add(out_pos), groups_a);
343                store_u64_le(out_ptr.add(out_pos + 8), groups_b);
344            }
345            out_pos += 16;
346            i += 14;
347            continue;
348        }
349
350        unsafe {
351            process_groups8_masked(
352                groups_a,
353                mask_a,
354                out_ptr,
355                &mut out_pos,
356                &mut pending_illegal_bits,
357                &mut has_pending_illegal,
358            );
359            process_groups8_masked(
360                groups_b,
361                mask_b,
362                out_ptr,
363                &mut out_pos,
364                &mut pending_illegal_bits,
365                &mut has_pending_illegal,
366            );
367        }
368        i += 14;
369    }
370
371    while i + 8 <= len {
372        let bits56 = unsafe { load56_be_overread1(ptr.add(i)) };
373        let groups_le = split56_to_groups_le(bits56);
374        let mask = illegal_high_mask(groups_le);
375
376        if !has_pending_illegal && mask == 0 {
377            unsafe { store_u64_le(out_ptr.add(out_pos), groups_le) };
378            out_pos += 8;
379            i += 7;
380            continue;
381        }
382
383        unsafe {
384            process_groups8_masked(
385                groups_le,
386                mask,
387                out_ptr,
388                &mut out_pos,
389                &mut pending_illegal_bits,
390                &mut has_pending_illegal,
391            );
392        }
393        i += 7;
394    }
395
396    if i + 7 <= len {
397        let bits56 = unsafe { load56_be_exact(ptr.add(i)) };
398        let groups_le = split56_to_groups_le(bits56);
399        let mask = illegal_high_mask(groups_le);
400
401        if !has_pending_illegal && mask == 0 {
402            unsafe { store_u64_le(out_ptr.add(out_pos), groups_le) };
403            out_pos += 8;
404            i += 7;
405        } else {
406            unsafe {
407                process_groups8_masked(
408                    groups_le,
409                    mask,
410                    out_ptr,
411                    &mut out_pos,
412                    &mut pending_illegal_bits,
413                    &mut has_pending_illegal,
414                );
415            }
416            i += 7;
417        }
418    }
419
420    let tail = &data[i..];
421    let mut tail_pos = 0usize;
422    let mut acc = 0u64;
423    let mut acc_bits = 0u32;
424
425    if has_pending_illegal {
426        if let Some(nb) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
427            unsafe { emit_escape_pair(out_ptr, &mut out_pos, pending_illegal_bits, nb) };
428        } else {
429            unsafe { emit_shortened(out_ptr, &mut out_pos, pending_illegal_bits) };
430        }
431    }
432
433    while let Some(cur) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
434        let class = unsafe { *CLASS.get_unchecked(cur as usize) };
435        if class == 0 {
436            unsafe { *out_ptr.add(out_pos) = cur };
437            out_pos += 1;
438        } else if let Some(nb) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
439            unsafe { emit_escape_pair(out_ptr, &mut out_pos, cur, nb) };
440        } else {
441            unsafe { emit_shortened(out_ptr, &mut out_pos, cur) };
442            break;
443        }
444    }
445
446    unsafe {
447        out.set_len(out_pos);
448        String::from_utf8_unchecked(out)
449    }
450}
451
452#[inline(always)]
453unsafe fn unpack8groups_chunk_le(
454    chunk_le: u64,
455    out_ptr: *mut u8,
456    out_pos: &mut usize,
457    acc: &mut u64,
458    acc_bits: &mut u32,
459) {
460    let g0 = (chunk_le & 0xFF) as u64;
461    let g1 = ((chunk_le >> 8) & 0xFF) as u64;
462    let g2 = ((chunk_le >> 16) & 0xFF) as u64;
463    let g3 = ((chunk_le >> 24) & 0xFF) as u64;
464    let g4 = ((chunk_le >> 32) & 0xFF) as u64;
465    let g5 = ((chunk_le >> 40) & 0xFF) as u64;
466    let g6 = ((chunk_le >> 48) & 0xFF) as u64;
467    let g7 = ((chunk_le >> 56) & 0xFF) as u64;
468
469    let bits56 = (g0 << 49)
470        | (g1 << 42)
471        | (g2 << 35)
472        | (g3 << 28)
473        | (g4 << 21)
474        | (g5 << 14)
475        | (g6 << 7)
476        | g7;
477
478    let k = *acc_bits;
479    let combined = (*acc << 56) | bits56;
480
481    unsafe { store_be_partial(out_ptr.add(*out_pos), combined >> k, 7) };
482    *out_pos += 7;
483
484    *acc = if k == 0 {
485        0
486    } else {
487        combined & ((1u64 << k) - 1)
488    };
489}
490
491#[inline(always)]
492unsafe fn push_ascii_prefix_le(
493    mut chunk_le: u64,
494    count: usize,
495    out_ptr: *mut u8,
496    out_pos: &mut usize,
497    acc: &mut u64,
498    acc_bits: &mut u32,
499) {
500    debug_assert!(count <= 7);
501
502    let mut packed = 0u64;
503    let mut n = 0usize;
504    while n < count {
505        packed = (packed << 7) | ((chunk_le as u8) as u64);
506        chunk_le >>= 8;
507        n += 1;
508    }
509
510    let add_bits = (count as u32) * 7;
511    let total_bits = *acc_bits + add_bits;
512    let new_bits = total_bits & 7;
513    let emitted = (total_bits >> 3) as usize;
514    let combined = (*acc << add_bits) | packed;
515
516    if emitted != 0 {
517        unsafe { store_be_partial(out_ptr.add(*out_pos), combined >> new_bits, emitted) };
518        *out_pos += emitted;
519    }
520
521    *acc_bits = new_bits;
522    *acc = if new_bits == 0 {
523        0
524    } else {
525        combined & ((1u64 << new_bits) - 1)
526    };
527}
528
529#[inline(always)]
530unsafe fn push7_scalar(
531    out_ptr: *mut u8,
532    out_pos: &mut usize,
533    acc: &mut u64,
534    acc_bits: &mut u32,
535    bits: u8,
536) {
537    let combined = (*acc << 7) | (bits as u64);
538    let total_bits = *acc_bits + 7;
539
540    if total_bits >= 8 {
541        let new_bits = total_bits - 8;
542        unsafe { *out_ptr.add(*out_pos) = (combined >> new_bits) as u8 };
543        *out_pos += 1;
544        *acc_bits = new_bits;
545        *acc = if new_bits == 0 {
546            0
547        } else {
548            combined & ((1_u64 << new_bits) - 1)
549        };
550    } else {
551        *acc_bits = total_bits;
552        *acc = combined;
553    }
554}
555
556#[inline(always)]
557unsafe fn push14_scalar(
558    out_ptr: *mut u8,
559    out_pos: &mut usize,
560    acc: &mut u64,
561    acc_bits: &mut u32,
562    hi7: u8,
563    lo7: u8,
564) {
565    let combined = (*acc << 14) | ((hi7 as u64) << 7) | (lo7 as u64);
566    let total_bits = *acc_bits + 14;
567    let new_bits = total_bits & 7;
568    let emitted = total_bits >> 3;
569    let out_bits = combined >> new_bits;
570
571    if emitted == 2 {
572        unsafe { (out_ptr.add(*out_pos) as *mut u16).write_unaligned((out_bits as u16).to_be()) };
573        *out_pos += 2;
574    } else {
575        unsafe { *out_ptr.add(*out_pos) = out_bits as u8 };
576        *out_pos += 1;
577    }
578
579    *acc_bits = new_bits;
580    *acc = if new_bits == 0 {
581        0
582    } else {
583        combined & ((1_u64 << new_bits) - 1)
584    };
585}
586
587#[cold]
588#[inline(never)]
589fn decode_err<T>(msg: &'static str) -> Result<T, &'static str> {
590    Err(msg)
591}
592
593/// Decodes a Base122 encoded string back into its original binary form.
594///
595/// # Errors
596///
597/// Returns an `Err` if the input string is not a valid Base122 sequence.
598/// This can happen if:
599/// *   It contains invalid UTF-8 lead bytes not conforming to Base122 escape patterns.
600/// *   An escape sequence is malformed or truncated.
601/// *   The illegal character index is out of the defined 0-6 range.
602///
603/// # Performance
604///
605/// Decoding is optimized via unaligned 64-bit reads and a lookup-table-based
606/// state machine to quickly process 7-bit groups.
607///
608/// # Examples
609///
610/// ```rust
611/// let data = b"data";
612/// let encoded = base122_fast::encode(data);
613/// let decoded = base122_fast::decode(&encoded).unwrap();
614///
615/// assert_eq!(data, &decoded[..]);
616/// ```
617pub fn decode(encoded: &str) -> Result<Vec<u8>, &'static str> {
618    if encoded.is_empty() {
619        return Ok(Vec::new());
620    }
621
622    let bytes = encoded.as_bytes();
623    let len = bytes.len();
624    let mut out = Vec::<u8>::with_capacity(decoded_capacity(len));
625    let out_ptr = out.as_mut_ptr();
626
627    let ptr = bytes.as_ptr();
628    let mut out_pos = 0usize;
629    let mut acc = 0u64;
630    let mut acc_bits = 0u32;
631    let mut i = 0usize;
632
633    while i < len {
634        while i + 16 <= len {
635            let c0 = unsafe { load_u64_le(ptr.add(i)) };
636            let c1 = unsafe { load_u64_le(ptr.add(i + 8)) };
637
638            if ((c0 | c1) & ASCII_MASK_8) != 0 {
639                break;
640            }
641
642            unsafe {
643                unpack8groups_chunk_le(c0, out_ptr, &mut out_pos, &mut acc, &mut acc_bits);
644                unpack8groups_chunk_le(c1, out_ptr, &mut out_pos, &mut acc, &mut acc_bits);
645            }
646            i += 16;
647        }
648
649        while i + 8 <= len {
650            let chunk = unsafe { load_u64_le(ptr.add(i)) };
651            let high = chunk & ASCII_MASK_8;
652
653            if high == 0 {
654                unsafe {
655                    unpack8groups_chunk_le(chunk, out_ptr, &mut out_pos, &mut acc, &mut acc_bits);
656                }
657                i += 8;
658                continue;
659            }
660
661            let ascii_prefix = (high.trailing_zeros() >> 3) as usize;
662            if ascii_prefix != 0 {
663                unsafe {
664                    push_ascii_prefix_le(
665                        chunk,
666                        ascii_prefix,
667                        out_ptr,
668                        &mut out_pos,
669                        &mut acc,
670                        &mut acc_bits,
671                    );
672                }
673                i += ascii_prefix;
674                continue;
675            }
676
677            break;
678        }
679
680        while i < len {
681            let b = unsafe { *ptr.add(i) };
682            if b >= 128 {
683                break;
684            }
685            i += 1;
686            unsafe { push7_scalar(out_ptr, &mut out_pos, &mut acc, &mut acc_bits, b) };
687        }
688
689        if i >= len {
690            break;
691        }
692
693        let b1 = unsafe { *ptr.add(i) };
694        let code = LEAD_DECODE[b1 as usize];
695        if code == 0xFF {
696            return decode_err("Invalid lead byte");
697        }
698
699        if i + 1 >= len {
700            return decode_err("Unexpected end of input");
701        }
702
703        let b2 = unsafe { *ptr.add(i + 1) };
704        if (b2 & 0xC0) != 0x80 {
705            return decode_err("Invalid continuation byte");
706        }
707
708        i += 2;
709
710        let illegal_index = code >> 1;
711        let first_bit = code & 1;
712        let next = (first_bit << 6) | (b2 & 0x3F);
713
714        unsafe {
715            if illegal_index < 6 {
716                push14_scalar(
717                    out_ptr,
718                    &mut out_pos,
719                    &mut acc,
720                    &mut acc_bits,
721                    ILLEGALS[illegal_index as usize],
722                    next,
723                );
724            } else {
725                debug_assert_eq!(illegal_index, SHORTENED);
726                push7_scalar(out_ptr, &mut out_pos, &mut acc, &mut acc_bits, next);
727            }
728        }
729    }
730
731    unsafe { out.set_len(out_pos) };
732    Ok(out)
733}
734
735#[cfg(test)]
736mod tests {
737    use super::*;
738    use alloc::{format, vec};
739
740    #[test]
741    fn test_empty() {
742        assert_eq!(encode(b""), "");
743        assert_eq!(decode("").unwrap(), b"");
744    }
745
746    #[test]
747    fn test_hello_world() {
748        let data = b"hello world";
749        let enc = encode(data);
750        let dec = decode(&enc).expect("decoding failed");
751        assert_eq!(dec, data);
752    }
753
754    #[test]
755    fn test_single_byte_values() {
756        for b in 0..=255u8 {
757            let data = vec![b];
758            let enc = encode(&data);
759            let dec = decode(&enc).expect(&format!("decoding failed for byte {}", b));
760            assert_eq!(dec, data, "failed for byte {}", b);
761        }
762    }
763
764    #[test]
765    fn test_various_lengths_roundtrip() {
766        for len in [
767            0, 1, 2, 3, 6, 7, 8, 9, 14, 15, 16, 17, 31, 32, 33, 100, 255, 256, 511, 512,
768        ] {
769            let data: Vec<u8> = (0..len).map(|i| (i % 251) as u8).collect();
770            let enc = encode(&data);
771            let dec = decode(&enc).expect("decoding failed");
772            assert_eq!(dec, data, "roundtrip failed for length {}", len);
773        }
774    }
775
776    #[test]
777    fn test_all_illegal_bytes_handling() {
778        let data = b"\x00\x0A\x0D\x22\x26\x5C";
779        let enc = encode(data);
780        let dec = decode(&enc).expect("decoding failed");
781        assert_eq!(dec, data.as_ref());
782    }
783
784    #[test]
785    fn test_mixed_content() {
786        let data: Vec<u8> = (0..=255).collect();
787        let enc = encode(&data);
788        let dec = decode(&enc).expect("decoding failed");
789        assert_eq!(dec, data);
790    }
791
792    #[test]
793    fn test_repeated_illegal_bytes() {
794        let data = vec![0u8; 100];
795        let enc = encode(&data);
796        let dec = decode(&enc).expect("decoding failed");
797        assert_eq!(dec, data);
798    }
799
800    #[test]
801    fn test_decode_invalid_lead_byte() {
802        let invalid = vec![0x80u8];
803        let s = unsafe { String::from_utf8_unchecked(invalid) };
804        assert!(decode(&s).is_err());
805        let invalid2 = vec![0xFFu8];
806        let s2 = unsafe { String::from_utf8_unchecked(invalid2) };
807        assert!(decode(&s2).is_err());
808    }
809
810    #[test]
811    fn test_decode_truncated_escape() {
812        let mut data = vec![0xC0u8];
813        let s = unsafe { String::from_utf8_unchecked(data.clone()) };
814        assert!(decode(&s).is_err());
815        data.push(0x40);
816        let s2 = unsafe { String::from_utf8_unchecked(data) };
817        assert!(decode(&s2).is_err());
818    }
819
820    #[test]
821    fn test_decode_invalid_continuation_byte() {
822        let data = vec![0xC2u8, 0xFF];
823        let s = unsafe { String::from_utf8_unchecked(data) };
824        assert!(decode(&s).is_err());
825    }
826
827    #[test]
828    fn test_shortened_at_end() {
829        let data = vec![0u8];
830        let enc = encode(&data);
831        let dec = decode(&enc).expect("decode failed");
832        assert_eq!(dec, data);
833    }
834
835    #[test]
836    fn test_very_long_input() {
837        use rand::Rng;
838        const DATA_SIZE: usize = 5_000_000;
839        let mut data = vec![0u8; DATA_SIZE];
840        let mut rng = rand::rng();
841        rng.fill_bytes(&mut data);
842        let enc = encode(&data);
843        let dec = decode(&enc).expect("decode failed");
844        assert_eq!(dec, data);
845    }
846}