qr_base44/
lib.rs

1//! qr-base44: Base44 encoder/decoder for arbitrary bytes using URL-safe QR-compatible alphabet.
2//! - Encoding groups: 2 bytes -> 3 chars; 1 byte -> 2 chars.
3//! - Alphabet: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ$%*+-./:" (44 chars, excludes space only)
4//! - Public API encodes &[u8] -> String and decodes &str -> Vec<u8>.
5
6use num_bigint::BigUint;
7use num_traits::{One, Zero};
8
9#[derive(Debug, thiserror::Error)]
10pub enum Base44Error {
11    #[error("invalid base44 character")]
12    InvalidChar,
13    #[error("dangling character group")]
14    Dangling,
15    #[error("value overflow")]
16    Overflow,
17}
18
19/// Base44 alphabet: URL-safe QR-compatible subset (excludes space only)
20pub const BASE44_ALPHABET: &[u8; 44] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ$%*+-./:";
21
22#[inline]
23fn b44_val(ch: u8) -> Option<u16> {
24    match ch {
25        b'0'..=b'9' => Some((ch - b'0') as u16),
26        b'A'..=b'Z' => Some(10 + (ch - b'A') as u16),
27        b'$' => Some(36),
28        b'%' => Some(37),
29        b'*' => Some(38),
30        b'+' => Some(39),
31        b'-' => Some(40),
32        b'.' => Some(41),
33        b'/' => Some(42),
34        b':' => Some(43),
35        _ => None,
36    }
37}
38
39/// Encode arbitrary bytes into a Base44 string.
40/// Groups of 2 bytes produce 3 characters; a final single byte produces 2 characters.
41pub fn encode(input: &[u8]) -> String {
42    let mut out = String::with_capacity((input.len() * 3).div_ceil(2));
43    let mut i = 0;
44    while i + 1 < input.len() {
45        let x = (input[i] as u16) * 256 + (input[i + 1] as u16);
46        let c = x % 44; // least significant digit
47        let x = x / 44;
48        let b = x % 44;
49        let a = x / 44; // most significant digit
50        // Base44 outputs least-significant digit first
51        out.push(BASE44_ALPHABET[c as usize] as char);
52        out.push(BASE44_ALPHABET[b as usize] as char);
53        out.push(BASE44_ALPHABET[a as usize] as char);
54        i += 2;
55    }
56    if i < input.len() {
57        let x = input[i] as u16;
58        let b = x % 44;
59        let a = x / 44;
60        // Base44 outputs least-significant digit first for single byte too
61        out.push(BASE44_ALPHABET[b as usize] as char);
62        out.push(BASE44_ALPHABET[a as usize] as char);
63    }
64    out
65}
66
67/// Decode a Base44 string back to raw bytes.
68/// Accepts only the Base44 alphabet; returns errors for invalid chars, dangling final char, or overflow.
69pub fn decode(s: &str) -> Result<Vec<u8>, Base44Error> {
70    let bytes = s.as_bytes();
71    let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
72    let mut i = 0;
73    while i + 2 < bytes.len() {
74        // Input is least-significant digit first: c (lsd), b, a (msd)
75        let c0 = b44_val(bytes[i]).ok_or(Base44Error::InvalidChar)? as u32;
76        let c1 = b44_val(bytes[i + 1]).ok_or(Base44Error::InvalidChar)? as u32;
77        let c2 = b44_val(bytes[i + 2]).ok_or(Base44Error::InvalidChar)? as u32;
78        let x: u32 = c2 * 44 * 44 + c1 * 44 + c0; // 0..(44^3 - 1)
79        if x > 65535 {
80            return Err(Base44Error::Overflow);
81        }
82        out.push((x / 256) as u8);
83        out.push((x % 256) as u8);
84        i += 3;
85    }
86    if i < bytes.len() {
87        if i + 1 >= bytes.len() {
88            // Single trailing character: report InvalidChar if it's not in alphabet, otherwise Dangling
89            if b44_val(bytes[i]).is_none() {
90                return Err(Base44Error::InvalidChar);
91            }
92            return Err(Base44Error::Dangling);
93        }
94        let c0 = b44_val(bytes[i]).ok_or(Base44Error::InvalidChar)? as u32;
95        let c1 = b44_val(bytes[i + 1]).ok_or(Base44Error::InvalidChar)? as u32;
96        let x: u32 = c1 * 44 + c0; // 0..(44^2 - 1)
97        if x > 255 {
98            return Err(Base44Error::Overflow);
99        }
100        out.push(x as u8);
101    }
102    Ok(out)
103}
104
105/// Encode a fixed number of bits (arbitrary length) as a Base44 string with optimal length.
106///
107/// This function treats the input bytes as a big integer containing exactly `bits` bits
108/// and encodes it using the minimum number of Base44 characters required.
109///
110/// # Optimal Encoding
111///
112/// For N bits, the optimal Base44 length is `ceil(N * log(2) / log(44))`:
113/// - 103 bits → 19 chars (2^103 < 44^19)
114/// - 104 bits → 20 chars (2^104 < 44^20)
115/// - 256 bits → 47 chars (2^256 < 44^47)
116///
117/// This is more efficient than byte-pair encoding when the bit count doesn't align
118/// with byte boundaries, saving up to 5% space for certain bit lengths.
119///
120/// # Performance Optimization
121///
122/// For small bit counts, native integer types are used for better performance:
123/// - bits ≤ 64: uses u64 (fastest)
124/// - bits ≤ 128: uses u128 (fast)
125/// - bits > 128: uses BigUint (fallback)
126///
127/// # Arguments
128///
129/// * `bits` - Number of significant bits (must be > 0). Bytes are read in little-endian order.
130/// * `bytes` - Input bytes in LSB-first order (matching typical bit-packing schemes).
131///
132/// # Example
133///
134/// ```
135/// // Encode 103 bits (13 bytes with top byte using 7 bits)
136/// let data = [0u8; 13];
137/// let encoded = qr_base44::encode_bits(103, &data);
138/// assert_eq!(encoded.len(), 19); // Optimal length for 103 bits
139/// ```
140pub fn encode_bits(bits: usize, bytes: &[u8]) -> String {
141    assert!(bits > 0, "bits must be > 0");
142    let expected_bytes = bits.div_ceil(8);
143    assert!(
144        bytes.len() >= expected_bytes,
145        "Need at least {} bytes for {} bits, got {}",
146        expected_bytes,
147        bits,
148        bytes.len()
149    );
150
151    // Use optimized paths for common bit sizes
152    if bits <= 64 {
153        encode_bits_u64(bits, bytes)
154    } else if bits <= 128 {
155        encode_bits_u128(bits, bytes)
156    } else {
157        encode_bits_bigint(bits, bytes)
158    }
159}
160
161/// Fast path for bits <= 64 using u64
162#[inline]
163fn encode_bits_u64(bits: usize, bytes: &[u8]) -> String {
164    let expected_bytes = bits.div_ceil(8);
165    let mut value = 0u64;
166    for (i, &b) in bytes.iter().take(expected_bytes).enumerate() {
167        value |= (b as u64) << (i * 8);
168    }
169
170    let chars_needed = ((bits as f64) * 2f64.ln() / 44f64.ln()).ceil() as usize;
171    let mut result = Vec::with_capacity(chars_needed);
172
173    for _ in 0..chars_needed {
174        let digit = (value % 44) as usize;
175        result.push(BASE44_ALPHABET[digit]);
176        value /= 44;
177    }
178
179    result.reverse();
180    String::from_utf8(result).unwrap()
181}
182
183/// Fast path for bits <= 128 using u128
184#[inline]
185fn encode_bits_u128(bits: usize, bytes: &[u8]) -> String {
186    let expected_bytes = bits.div_ceil(8);
187    let mut value = 0u128;
188    for (i, &b) in bytes.iter().take(expected_bytes).enumerate() {
189        value |= (b as u128) << (i * 8);
190    }
191
192    let chars_needed = ((bits as f64) * 2f64.ln() / 44f64.ln()).ceil() as usize;
193    let mut result = Vec::with_capacity(chars_needed);
194
195    for _ in 0..chars_needed {
196        let digit = (value % 44) as usize;
197        result.push(BASE44_ALPHABET[digit]);
198        value /= 44;
199    }
200
201    result.reverse();
202    String::from_utf8(result).unwrap()
203}
204
205/// Fallback path for bits > 128 using BigUint
206fn encode_bits_bigint(bits: usize, bytes: &[u8]) -> String {
207    let expected_bytes = bits.div_ceil(8);
208
209    // Convert bytes to BigUint (little-endian)
210    let mut value = BigUint::zero();
211    for (i, &b) in bytes.iter().take(expected_bytes).enumerate() {
212        value += BigUint::from(b) << (i * 8);
213    }
214
215    // Calculate optimal character count: ceil(bits * log(2) / log(44))
216    let chars_needed = ((bits as f64) * 2f64.ln() / 44f64.ln()).ceil() as usize;
217
218    // Convert to base44
219    let mut result = Vec::with_capacity(chars_needed);
220    let forty_four = BigUint::from(44u32);
221    let mut v = value;
222
223    for _ in 0..chars_needed {
224        let digit = (&v % &forty_four).to_u32_digits();
225        let d = if digit.is_empty() {
226            0
227        } else {
228            digit[0] as usize
229        };
230        result.push(BASE44_ALPHABET[d]);
231        v /= &forty_four;
232    }
233
234    // Reverse to get most significant digit first
235    result.reverse();
236    String::from_utf8(result).unwrap()
237}
238
239/// Decode a Base44 string back to bytes, expecting a specific bit count.
240///
241/// This is the inverse of [`encode_bits`]. The output bytes are in little-endian order
242/// (LSB-first), matching typical bit-packing schemes.
243///
244/// # Performance Optimization
245///
246/// For small bit counts, native integer types are used for better performance:
247/// - bits ≤ 64: uses u64 (fastest)
248/// - bits ≤ 128: uses u128 (fast)
249/// - bits > 128: uses BigUint (fallback)
250///
251/// # Arguments
252///
253/// * `bits` - Expected number of significant bits (must be > 0)
254/// * `s` - Base44 string to decode
255///
256/// # Returns
257///
258/// A vector of bytes in LSB-first order containing exactly `ceil(bits / 8)` bytes.
259/// Returns an error if the string contains invalid characters or the decoded value
260/// exceeds the specified bit count.
261///
262/// # Example
263///
264/// ```
265/// let encoded = qr_base44::encode_bits(103, &[0u8; 13]);
266/// let decoded = qr_base44::decode_bits(103, &encoded).unwrap();
267/// assert_eq!(decoded.len(), 13);
268/// ```
269pub fn decode_bits(bits: usize, s: &str) -> Result<Vec<u8>, Base44Error> {
270    assert!(bits > 0, "bits must be > 0");
271
272    // Use optimized paths for common bit sizes
273    if bits <= 64 {
274        decode_bits_u64(bits, s)
275    } else if bits <= 128 {
276        decode_bits_u128(bits, s)
277    } else {
278        decode_bits_bigint(bits, s)
279    }
280}
281
282/// Fast path for bits <= 64 using u64
283#[inline]
284fn decode_bits_u64(bits: usize, s: &str) -> Result<Vec<u8>, Base44Error> {
285    let mut value = 0u64;
286
287    for ch in s.chars() {
288        let digit = b44_val(ch as u8).ok_or(Base44Error::InvalidChar)?;
289        value = value
290            .checked_mul(44)
291            .and_then(|v| v.checked_add(digit as u64))
292            .ok_or(Base44Error::Overflow)?;
293    }
294
295    // Verify value fits in specified bits
296    if bits < 64 {
297        let max_value = (1u64 << bits) - 1;
298        if value > max_value {
299            return Err(Base44Error::Overflow);
300        }
301    }
302
303    // Convert to bytes (little-endian)
304    let byte_count = bits.div_ceil(8);
305    let mut bytes = vec![0u8; byte_count];
306    for (i, byte) in bytes.iter_mut().enumerate().take(byte_count) {
307        *byte = (value >> (i * 8)) as u8;
308    }
309
310    Ok(bytes)
311}
312
313/// Fast path for bits <= 128 using u128
314#[inline]
315fn decode_bits_u128(bits: usize, s: &str) -> Result<Vec<u8>, Base44Error> {
316    let mut value = 0u128;
317
318    for ch in s.chars() {
319        let digit = b44_val(ch as u8).ok_or(Base44Error::InvalidChar)?;
320        value = value
321            .checked_mul(44)
322            .and_then(|v| v.checked_add(digit as u128))
323            .ok_or(Base44Error::Overflow)?;
324    }
325
326    // Verify value fits in specified bits
327    if bits < 128 {
328        let max_value = (1u128 << bits) - 1;
329        if value > max_value {
330            return Err(Base44Error::Overflow);
331        }
332    }
333
334    // Convert to bytes (little-endian)
335    let byte_count = bits.div_ceil(8);
336    let mut bytes = vec![0u8; byte_count];
337    for (i, byte) in bytes.iter_mut().enumerate().take(byte_count) {
338        *byte = (value >> (i * 8)) as u8;
339    }
340
341    Ok(bytes)
342}
343
344/// Fallback path for bits > 128 using BigUint
345fn decode_bits_bigint(bits: usize, s: &str) -> Result<Vec<u8>, Base44Error> {
346    // Convert base44 string to BigUint
347    let mut value = BigUint::zero();
348    let forty_four = BigUint::from(44u32);
349
350    for ch in s.chars() {
351        let digit = b44_val(ch as u8).ok_or(Base44Error::InvalidChar)?;
352        value = value * &forty_four + BigUint::from(digit as u32);
353    }
354
355    // Verify value fits in specified bits
356    let max_value = if bits < usize::MAX {
357        (BigUint::one() << bits) - BigUint::one()
358    } else {
359        // For very large bit counts, skip overflow check
360        value.clone()
361    };
362
363    if value > max_value {
364        return Err(Base44Error::Overflow);
365    }
366
367    // Convert to bytes (little-endian)
368    let byte_count = bits.div_ceil(8);
369    let mut bytes = vec![0u8; byte_count];
370    let value_bytes = value.to_bytes_le();
371
372    for (i, &b) in value_bytes.iter().enumerate() {
373        if i < byte_count {
374            bytes[i] = b;
375        }
376    }
377
378    Ok(bytes)
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    #[test]
386    fn roundtrips() {
387        let cases: &[&[u8]] = &[
388            b"",
389            b"A",
390            b"AB",
391            b"Hello, world!",
392            &[0x00],
393            &[0x00, 0x01, 0xFF, 0x80, 0x7F],
394        ];
395        for &case in cases {
396            let s = encode(case);
397            let dec = decode(&s).unwrap();
398            assert_eq!(case, dec.as_slice());
399        }
400    }
401
402    #[test]
403    fn known_vectors() {
404        // Base44 uses least-significant digit first (lsd-first): output order is c, b, a.
405        // For a 2-byte group [u, v], form x = u*256 + v, then:
406        // c = x % 44; x /= 44; b = x % 44; a = x / 44; and output chars are [c, b, a].
407        // For a 1-byte group [u], b = u % 44; a = u / 44; and output chars are [b, a].
408        // Edge cases at boundaries
409        // [0x00, 0x00] -> x = 0; digits: c=0, b=0, a=0; output lsd-first -> "000"
410        assert_eq!(encode(&[0x00, 0x00]), "000");
411
412        // Test single byte encoding
413        // [0x41] (ASCII 'A' = 65) -> b = 65 % 44 = 21 (L), a = 65 / 44 = 1 (1) -> "L1"
414        assert_eq!(encode(&[0x41]), "L1");
415
416        // Test two byte encoding
417        // [0x00, 0x01] -> x = 1; c = 1 % 44 = 1, x = 0, b = 0, a = 0 -> "100"
418        assert_eq!(encode(&[0x00, 0x01]), "100");
419
420        // Verify decoding matches
421        assert_eq!(decode("000").unwrap(), &[0x00, 0x00]);
422        assert_eq!(decode("L1").unwrap(), &[0x41]);
423        assert_eq!(decode("100").unwrap(), &[0x00, 0x01]);
424    }
425
426    #[test]
427    fn errors() {
428        // Error categories under test:
429        // - InvalidChar: character not in Base44 alphabet
430        // - Dangling: incomplete group (e.g., single trailing valid character)
431        // - Overflow: numeric value exceeds maximum for the group
432        // Invalid characters and structural errors
433        assert!(matches!(decode("\t"), Err(Base44Error::InvalidChar))); // '\t' not in Base44 alphabet
434        assert!(matches!(decode("\n"), Err(Base44Error::InvalidChar))); // '\n' not in Base44 alphabet
435        assert!(matches!(decode(" "), Err(Base44Error::InvalidChar))); // space removed from Base44
436        // Overflow cases
437        // 3-char group with max digits -> value > 65535
438        assert!(matches!(decode(":::"), Err(Base44Error::Overflow))); // ':::' -> 43*44^2 + 43*44 + 43 = 85183 > 65535
439        // 2-char group producing >255
440        assert!(matches!(decode("//"), Err(Base44Error::Overflow))); // '//' -> 42*44 + 42 = 1890 > 255
441
442        assert!(matches!(decode("A"), Err(Base44Error::Dangling))); // single valid char -> incomplete group
443        assert!(matches!(decode("😀"), Err(Base44Error::InvalidChar))); // not in Base44 alphabet
444    }
445
446    #[test]
447    fn boundary_cases() {
448        // Test maximum valid values for 2-char encoding (single byte)
449        // Max single byte: 255
450        // 255 = 5*44 + 35, so encoding should be alphabet[35] + alphabet[5] = "Z5"
451        assert_eq!(encode(&[0xFF]), "Z5");
452        assert_eq!(decode("Z5").unwrap(), &[0xFF]);
453
454        // Test maximum valid 2-byte value: [0xFF, 0xFF]
455        // x = 255*256 + 255 = 65535
456        // c = 65535 % 44 = 19 (J), x = 1489
457        // b = 1489 % 44 = 37 (%), a = 1489 / 44 = 33 (X)
458        assert_eq!(encode(&[0xFF, 0xFF]), "J%X");
459        assert_eq!(decode("J%X").unwrap(), &[0xFF, 0xFF]);
460
461        // Test all alphabet characters are valid for decoding
462        let alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ$%*+-./:";
463        for (idx, ch) in alphabet.chars().enumerate() {
464            // For positions 0-33 (0-9, A-X), can safely use "00{ch}" without overflow
465            // Position 34 (Y) onwards: 34*44^2 = 65824 > 65535, so use "{ch}0" format
466            if idx < 34 {
467                let s = format!("00{ch}");
468                decode(&s)
469                    .unwrap_or_else(|_| panic!("Character {ch} should be valid in 3-char group"));
470            } else {
471                // For chars that would overflow in "00{ch}" format, use "{ch}0" (value < 255)
472                let s = format!("{ch}0");
473                decode(&s).unwrap_or_else(|_| panic!("Character {ch} should be valid"));
474            }
475        }
476
477        // Test empty input
478        assert_eq!(encode(&[]), "");
479        assert_eq!(decode("").unwrap(), Vec::<u8>::new());
480
481        // Test mixed length data (odd number of bytes)
482        let data = &[0x01, 0x02, 0x03];
483        let encoded = encode(data);
484        assert_eq!(decode(&encoded).unwrap(), data);
485    }
486
487    #[test]
488    fn url_safe_characters() {
489        // Verify that encoded output contains no URL-problematic characters
490        // (no space, which was removed from Base45)
491        let test_data = &[
492            &[0x00][..],
493            &[0xFF],
494            &[0x00, 0xFF],
495            &[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0],
496        ];
497
498        for data in test_data {
499            let encoded = encode(data);
500            assert!(!encoded.contains(' '), "Encoded should not contain space");
501            // Verify all chars are in our alphabet
502            for ch in encoded.chars() {
503                assert!(
504                    BASE44_ALPHABET.contains(&(ch as u8)),
505                    "Character {ch} not in alphabet"
506                );
507            }
508        }
509    }
510
511    #[test]
512    fn optimal_bit_encoding_103() {
513        // Test optimal encoding for 103 bits (common use case: UUID compression)
514        // 2^103 < 44^19, so 103 bits should encode to exactly 19 characters
515        let mut data = [0xFFu8; 13];
516        data[12] = 0x7F; // Only 7 bits in last byte for 103 total bits
517        let encoded = encode_bits(103, &data);
518        assert_eq!(encoded.len(), 19, "103 bits should encode to 19 chars");
519
520        let decoded = decode_bits(103, &encoded).unwrap();
521        assert_eq!(decoded, data.to_vec(), "Roundtrip should preserve data");
522    }
523
524    #[test]
525    fn optimal_bit_encoding_roundtrip() {
526        // Test various bit lengths for roundtrip accuracy
527        let test_cases = vec![
528            (8, vec![0x42]),
529            (16, vec![0x12, 0x34]),
530            (24, vec![0xAB, 0xCD, 0xEF]),
531            (
532                103,
533                vec![
534                    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
535                ],
536            ),
537            (128, vec![0xFF; 16]),
538        ];
539
540        for (bits, data) in test_cases {
541            let encoded = encode_bits(bits, &data);
542            let decoded = decode_bits(bits, &encoded).unwrap();
543
544            // Compare only the relevant bits
545            let byte_count = bits.div_ceil(8);
546            assert_eq!(decoded.len(), byte_count);
547
548            // Verify data matches (may need to mask last byte)
549            for i in 0..byte_count {
550                if i == byte_count - 1 && bits % 8 != 0 {
551                    let mask = (1u8 << (bits % 8)) - 1;
552                    assert_eq!(decoded[i] & mask, data[i] & mask);
553                } else {
554                    assert_eq!(decoded[i], data[i]);
555                }
556            }
557        }
558    }
559
560    #[test]
561    fn optimal_vs_byte_pair_comparison() {
562        // Compare optimal bit encoding vs byte-pair encoding for 103 bits
563        let data = [
564            0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0, 0x11, 0x22, 0x33, 0x44, 0x55,
565        ];
566
567        let optimal = encode_bits(103, &data);
568        let byte_pair = encode(&data);
569
570        // Optimal should be 19 chars, byte-pair should be 20 chars
571        assert_eq!(optimal.len(), 19);
572        assert_eq!(byte_pair.len(), 20);
573
574        println!(
575            "103 bits: optimal={} chars, byte-pair={} chars, savings={}%",
576            optimal.len(),
577            byte_pair.len(),
578            (byte_pair.len() - optimal.len()) * 100 / byte_pair.len()
579        );
580    }
581
582    #[test]
583    fn optimal_bit_encoding_edge_cases() {
584        // Test edge cases
585
586        // All zeros
587        let zeros = vec![0u8; 13];
588        let encoded_zeros = encode_bits(103, &zeros);
589        assert_eq!(encoded_zeros.len(), 19);
590        let decoded_zeros = decode_bits(103, &encoded_zeros).unwrap();
591        assert_eq!(decoded_zeros, zeros);
592
593        // Single bit
594        let one_bit = vec![0x01];
595        let encoded_one = encode_bits(1, &one_bit);
596        let decoded_one = decode_bits(1, &encoded_one).unwrap();
597        assert_eq!(decoded_one[0] & 0x01, 1);
598
599        // Maximum value for 103 bits
600        let mut max_103 = vec![0xFFu8; 13];
601        max_103[12] = 0x7F; // Only 7 bits in last byte
602        let encoded_max = encode_bits(103, &max_103);
603        let decoded_max = decode_bits(103, &encoded_max).unwrap();
604        assert_eq!(decoded_max, max_103);
605    }
606
607    #[test]
608    fn large_bit_counts() {
609        // Test 256 bits (SHA-256 hash size)
610        let data_256 = vec![0x42u8; 32]; // 256 bits
611        let encoded_256 = encode_bits(256, &data_256);
612        // 256 bits should encode to ceil(256 * ln(2) / ln(44)) = 47 chars
613        assert_eq!(encoded_256.len(), 47);
614        let decoded_256 = decode_bits(256, &encoded_256).unwrap();
615        assert_eq!(decoded_256, data_256);
616
617        // Test 512 bits
618        let data_512 = vec![0xABu8; 64]; // 512 bits
619        let encoded_512 = encode_bits(512, &data_512);
620        // 512 bits should encode to ceil(512 * ln(2) / ln(44)) = 94 chars
621        assert_eq!(encoded_512.len(), 94);
622        let decoded_512 = decode_bits(512, &encoded_512).unwrap();
623        assert_eq!(decoded_512, data_512);
624
625        // Test 1024 bits
626        let data_1024 = vec![0x11u8; 128]; // 1024 bits
627        let encoded_1024 = encode_bits(1024, &data_1024);
628        let decoded_1024 = decode_bits(1024, &encoded_1024).unwrap();
629        assert_eq!(decoded_1024, data_1024);
630    }
631
632    #[test]
633    fn optimized_paths_u64_boundary() {
634        // Test boundary between u64 and u128 paths (64 bits)
635        let data_64 = vec![0xFFu8; 8]; // 64 bits
636        let encoded = encode_bits(64, &data_64);
637        let decoded = decode_bits(64, &encoded).unwrap();
638        assert_eq!(decoded, data_64);
639
640        // Test just below boundary (63 bits)
641        let mut data_63 = vec![0xFFu8; 8];
642        data_63[7] = 0x7F; // Only 7 bits in last byte
643        let encoded = encode_bits(63, &data_63);
644        let decoded = decode_bits(63, &encoded).unwrap();
645        assert_eq!(decoded[7] & 0x7F, data_63[7]);
646    }
647
648    #[test]
649    fn optimized_paths_u128_boundary() {
650        // Test boundary between u128 and BigUint paths (128 bits)
651        let data_128 = vec![0xFFu8; 16]; // 128 bits
652        let encoded = encode_bits(128, &data_128);
653        let decoded = decode_bits(128, &encoded).unwrap();
654        assert_eq!(decoded, data_128);
655
656        // Test just above boundary (129 bits) - should use BigUint
657        let mut data_129 = vec![0xFFu8; 17];
658        data_129[16] = 0x01; // Only 1 bit in last byte for 129 total bits
659        let encoded = encode_bits(129, &data_129);
660        let decoded = decode_bits(129, &encoded).unwrap();
661        assert_eq!(decoded[16] & 0x01, data_129[16]);
662    }
663
664    #[test]
665    fn optimized_paths_small_bits() {
666        // Test small bit counts to ensure u64 path works correctly
667        // 1-7 bits (fits in 1 byte)
668        for bits in 1..=7 {
669            let mask = (1u8 << bits) - 1;
670            let data = vec![0x7Fu8 & mask]; // Only use bits that fit
671            let encoded = encode_bits(bits, &data);
672            let decoded = decode_bits(bits, &encoded).unwrap();
673            assert_eq!(decoded[0] & mask, data[0] & mask, "Failed for {bits} bits");
674        }
675
676        // 32 bits (u64 path)
677        let data_32 = vec![0x12, 0x34, 0x56, 0x78];
678        let encoded_32 = encode_bits(32, &data_32);
679        let decoded_32 = decode_bits(32, &encoded_32).unwrap();
680        assert_eq!(decoded_32, data_32);
681
682        // 48 bits (u64 path)
683        let data_48 = vec![0xFF; 6];
684        let encoded_48 = encode_bits(48, &data_48);
685        let decoded_48 = decode_bits(48, &encoded_48).unwrap();
686        assert_eq!(decoded_48, data_48);
687    }
688
689    #[test]
690    fn encode_bits_error_cases() {
691        // Test decode_bits error handling for invalid characters
692        assert!(matches!(
693            decode_bits(8, "ABC😀"),
694            Err(Base44Error::InvalidChar)
695        ));
696
697        assert!(matches!(
698            decode_bits(8, "ABC "),
699            Err(Base44Error::InvalidChar)
700        ));
701
702        // Test overflow detection in u64 path
703        // For 8 bits, max value is 255, but we can try to decode a value > 255
704        let large_value = "ZZZ"; // This represents a large value
705        let result = decode_bits(8, large_value);
706        assert!(matches!(result, Err(Base44Error::Overflow)));
707
708        // Test overflow detection in u128 path (65 bits)
709        let large_value_128 = "ZZZZZZZZZZZZZZ";
710        let result = decode_bits(65, large_value_128);
711        assert!(matches!(result, Err(Base44Error::Overflow)));
712    }
713
714    #[test]
715    fn encode_bits_various_patterns() {
716        // Test alternating bit patterns
717        let alternating = vec![0xAA, 0x55, 0xAA, 0x55]; // 10101010 01010101 pattern
718        let encoded = encode_bits(32, &alternating);
719        let decoded = decode_bits(32, &encoded).unwrap();
720        assert_eq!(decoded, alternating);
721
722        // Test sequential bytes
723        let sequential: Vec<u8> = (0..16).collect();
724        let encoded = encode_bits(128, &sequential);
725        let decoded = decode_bits(128, &encoded).unwrap();
726        assert_eq!(decoded, sequential);
727
728        // Test random-like data
729        let random = vec![0x9E, 0x3D, 0x7B, 0x2F, 0xC8, 0x15, 0x64, 0xAA];
730        let encoded = encode_bits(64, &random);
731        let decoded = decode_bits(64, &encoded).unwrap();
732        assert_eq!(decoded, random);
733    }
734
735    #[test]
736    fn cross_verify_encode_methods() {
737        // For byte-aligned data, verify encode_bits produces valid output
738        // (though not necessarily identical to encode due to different algorithms)
739
740        // 8 bits - single byte
741        let data_8 = vec![0x42];
742        let encoded_bits = encode_bits(8, &data_8);
743        let decoded_bits = decode_bits(8, &encoded_bits).unwrap();
744        assert_eq!(decoded_bits, data_8);
745
746        // 16 bits - two bytes
747        let data_16 = vec![0x12, 0x34];
748        let encoded_bits = encode_bits(16, &data_16);
749        let decoded_bits = decode_bits(16, &encoded_bits).unwrap();
750        assert_eq!(decoded_bits, data_16);
751
752        // Both methods should produce decodable results
753        let data = vec![0xAB, 0xCD];
754        let encoded_pair = encode(&data);
755        let encoded_bits = encode_bits(16, &data);
756
757        // Verify both can be decoded correctly
758        assert_eq!(decode(&encoded_pair).unwrap(), data);
759        assert_eq!(decode_bits(16, &encoded_bits).unwrap(), data);
760    }
761
762    #[test]
763    fn alphabet_completeness() {
764        // Verify all 44 characters in alphabet are unique
765        let mut chars: Vec<u8> = BASE44_ALPHABET.to_vec();
766        chars.sort();
767        chars.dedup();
768        assert_eq!(
769            chars.len(),
770            44,
771            "Alphabet should have exactly 44 unique characters"
772        );
773
774        // Verify b44_val maps all alphabet chars correctly
775        for (expected_val, &ch) in BASE44_ALPHABET.iter().enumerate() {
776            let val = b44_val(ch);
777            let ch_char = ch as char;
778            assert_eq!(
779                val,
780                Some(expected_val as u16),
781                "Character {ch_char} should map to value {expected_val}"
782            );
783        }
784
785        // Verify b44_val rejects invalid characters
786        for ch in [b' ', b'\t', b'\n', b'@', b'[', b'`', b'{'] {
787            let ch_char = ch as char;
788            assert_eq!(b44_val(ch), None, "Character {ch_char} should not be valid");
789        }
790    }
791}