1use num_bigint::BigUint;
7use num_traits::{One, Zero};
8
9#[derive(Debug, thiserror::Error)]
10pub enum Base44Error {
11 #[error("invalid base44 character")]
12 InvalidChar,
13 #[error("dangling character group")]
14 Dangling,
15 #[error("value overflow")]
16 Overflow,
17}
18
19pub const BASE44_ALPHABET: &[u8; 44] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ$%*+-./:";
21
22#[inline]
23fn b44_val(ch: u8) -> Option<u16> {
24 match ch {
25 b'0'..=b'9' => Some((ch - b'0') as u16),
26 b'A'..=b'Z' => Some(10 + (ch - b'A') as u16),
27 b'$' => Some(36),
28 b'%' => Some(37),
29 b'*' => Some(38),
30 b'+' => Some(39),
31 b'-' => Some(40),
32 b'.' => Some(41),
33 b'/' => Some(42),
34 b':' => Some(43),
35 _ => None,
36 }
37}
38
39pub fn encode(input: &[u8]) -> String {
42 let mut out = String::with_capacity((input.len() * 3).div_ceil(2));
43 let mut i = 0;
44 while i + 1 < input.len() {
45 let x = (input[i] as u16) * 256 + (input[i + 1] as u16);
46 let c = x % 44; let x = x / 44;
48 let b = x % 44;
49 let a = x / 44; out.push(BASE44_ALPHABET[c as usize] as char);
52 out.push(BASE44_ALPHABET[b as usize] as char);
53 out.push(BASE44_ALPHABET[a as usize] as char);
54 i += 2;
55 }
56 if i < input.len() {
57 let x = input[i] as u16;
58 let b = x % 44;
59 let a = x / 44;
60 out.push(BASE44_ALPHABET[b as usize] as char);
62 out.push(BASE44_ALPHABET[a as usize] as char);
63 }
64 out
65}
66
67pub fn decode(s: &str) -> Result<Vec<u8>, Base44Error> {
70 let bytes = s.as_bytes();
71 let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
72 let mut i = 0;
73 while i + 2 < bytes.len() {
74 let c0 = b44_val(bytes[i]).ok_or(Base44Error::InvalidChar)? as u32;
76 let c1 = b44_val(bytes[i + 1]).ok_or(Base44Error::InvalidChar)? as u32;
77 let c2 = b44_val(bytes[i + 2]).ok_or(Base44Error::InvalidChar)? as u32;
78 let x: u32 = c2 * 44 * 44 + c1 * 44 + c0; if x > 65535 {
80 return Err(Base44Error::Overflow);
81 }
82 out.push((x / 256) as u8);
83 out.push((x % 256) as u8);
84 i += 3;
85 }
86 if i < bytes.len() {
87 if i + 1 >= bytes.len() {
88 if b44_val(bytes[i]).is_none() {
90 return Err(Base44Error::InvalidChar);
91 }
92 return Err(Base44Error::Dangling);
93 }
94 let c0 = b44_val(bytes[i]).ok_or(Base44Error::InvalidChar)? as u32;
95 let c1 = b44_val(bytes[i + 1]).ok_or(Base44Error::InvalidChar)? as u32;
96 let x: u32 = c1 * 44 + c0; if x > 255 {
98 return Err(Base44Error::Overflow);
99 }
100 out.push(x as u8);
101 }
102 Ok(out)
103}
104
105pub fn encode_bits(bits: usize, bytes: &[u8]) -> String {
141 assert!(bits > 0, "bits must be > 0");
142 let expected_bytes = bits.div_ceil(8);
143 assert!(
144 bytes.len() >= expected_bytes,
145 "Need at least {} bytes for {} bits, got {}",
146 expected_bytes,
147 bits,
148 bytes.len()
149 );
150
151 if bits <= 64 {
153 encode_bits_u64(bits, bytes)
154 } else if bits <= 128 {
155 encode_bits_u128(bits, bytes)
156 } else {
157 encode_bits_bigint(bits, bytes)
158 }
159}
160
161#[inline]
163fn encode_bits_u64(bits: usize, bytes: &[u8]) -> String {
164 let expected_bytes = bits.div_ceil(8);
165 let mut value = 0u64;
166 for (i, &b) in bytes.iter().take(expected_bytes).enumerate() {
167 value |= (b as u64) << (i * 8);
168 }
169
170 let chars_needed = ((bits as f64) * 2f64.ln() / 44f64.ln()).ceil() as usize;
171 let mut result = Vec::with_capacity(chars_needed);
172
173 for _ in 0..chars_needed {
174 let digit = (value % 44) as usize;
175 result.push(BASE44_ALPHABET[digit]);
176 value /= 44;
177 }
178
179 result.reverse();
180 String::from_utf8(result).unwrap()
181}
182
183#[inline]
185fn encode_bits_u128(bits: usize, bytes: &[u8]) -> String {
186 let expected_bytes = bits.div_ceil(8);
187 let mut value = 0u128;
188 for (i, &b) in bytes.iter().take(expected_bytes).enumerate() {
189 value |= (b as u128) << (i * 8);
190 }
191
192 let chars_needed = ((bits as f64) * 2f64.ln() / 44f64.ln()).ceil() as usize;
193 let mut result = Vec::with_capacity(chars_needed);
194
195 for _ in 0..chars_needed {
196 let digit = (value % 44) as usize;
197 result.push(BASE44_ALPHABET[digit]);
198 value /= 44;
199 }
200
201 result.reverse();
202 String::from_utf8(result).unwrap()
203}
204
205fn encode_bits_bigint(bits: usize, bytes: &[u8]) -> String {
207 let expected_bytes = bits.div_ceil(8);
208
209 let mut value = BigUint::zero();
211 for (i, &b) in bytes.iter().take(expected_bytes).enumerate() {
212 value += BigUint::from(b) << (i * 8);
213 }
214
215 let chars_needed = ((bits as f64) * 2f64.ln() / 44f64.ln()).ceil() as usize;
217
218 let mut result = Vec::with_capacity(chars_needed);
220 let forty_four = BigUint::from(44u32);
221 let mut v = value;
222
223 for _ in 0..chars_needed {
224 let digit = (&v % &forty_four).to_u32_digits();
225 let d = if digit.is_empty() {
226 0
227 } else {
228 digit[0] as usize
229 };
230 result.push(BASE44_ALPHABET[d]);
231 v /= &forty_four;
232 }
233
234 result.reverse();
236 String::from_utf8(result).unwrap()
237}
238
239pub fn decode_bits(bits: usize, s: &str) -> Result<Vec<u8>, Base44Error> {
270 assert!(bits > 0, "bits must be > 0");
271
272 if bits <= 64 {
274 decode_bits_u64(bits, s)
275 } else if bits <= 128 {
276 decode_bits_u128(bits, s)
277 } else {
278 decode_bits_bigint(bits, s)
279 }
280}
281
282#[inline]
284fn decode_bits_u64(bits: usize, s: &str) -> Result<Vec<u8>, Base44Error> {
285 let mut value = 0u64;
286
287 for ch in s.chars() {
288 let digit = b44_val(ch as u8).ok_or(Base44Error::InvalidChar)?;
289 value = value
290 .checked_mul(44)
291 .and_then(|v| v.checked_add(digit as u64))
292 .ok_or(Base44Error::Overflow)?;
293 }
294
295 if bits < 64 {
297 let max_value = (1u64 << bits) - 1;
298 if value > max_value {
299 return Err(Base44Error::Overflow);
300 }
301 }
302
303 let byte_count = bits.div_ceil(8);
305 let mut bytes = vec![0u8; byte_count];
306 for (i, byte) in bytes.iter_mut().enumerate().take(byte_count) {
307 *byte = (value >> (i * 8)) as u8;
308 }
309
310 Ok(bytes)
311}
312
313#[inline]
315fn decode_bits_u128(bits: usize, s: &str) -> Result<Vec<u8>, Base44Error> {
316 let mut value = 0u128;
317
318 for ch in s.chars() {
319 let digit = b44_val(ch as u8).ok_or(Base44Error::InvalidChar)?;
320 value = value
321 .checked_mul(44)
322 .and_then(|v| v.checked_add(digit as u128))
323 .ok_or(Base44Error::Overflow)?;
324 }
325
326 if bits < 128 {
328 let max_value = (1u128 << bits) - 1;
329 if value > max_value {
330 return Err(Base44Error::Overflow);
331 }
332 }
333
334 let byte_count = bits.div_ceil(8);
336 let mut bytes = vec![0u8; byte_count];
337 for (i, byte) in bytes.iter_mut().enumerate().take(byte_count) {
338 *byte = (value >> (i * 8)) as u8;
339 }
340
341 Ok(bytes)
342}
343
344fn decode_bits_bigint(bits: usize, s: &str) -> Result<Vec<u8>, Base44Error> {
346 let mut value = BigUint::zero();
348 let forty_four = BigUint::from(44u32);
349
350 for ch in s.chars() {
351 let digit = b44_val(ch as u8).ok_or(Base44Error::InvalidChar)?;
352 value = value * &forty_four + BigUint::from(digit as u32);
353 }
354
355 let max_value = if bits < usize::MAX {
357 (BigUint::one() << bits) - BigUint::one()
358 } else {
359 value.clone()
361 };
362
363 if value > max_value {
364 return Err(Base44Error::Overflow);
365 }
366
367 let byte_count = bits.div_ceil(8);
369 let mut bytes = vec![0u8; byte_count];
370 let value_bytes = value.to_bytes_le();
371
372 for (i, &b) in value_bytes.iter().enumerate() {
373 if i < byte_count {
374 bytes[i] = b;
375 }
376 }
377
378 Ok(bytes)
379}
380
381#[cfg(test)]
382mod tests {
383 use super::*;
384
385 #[test]
386 fn roundtrips() {
387 let cases: &[&[u8]] = &[
388 b"",
389 b"A",
390 b"AB",
391 b"Hello, world!",
392 &[0x00],
393 &[0x00, 0x01, 0xFF, 0x80, 0x7F],
394 ];
395 for &case in cases {
396 let s = encode(case);
397 let dec = decode(&s).unwrap();
398 assert_eq!(case, dec.as_slice());
399 }
400 }
401
402 #[test]
403 fn known_vectors() {
404 assert_eq!(encode(&[0x00, 0x00]), "000");
411
412 assert_eq!(encode(&[0x41]), "L1");
415
416 assert_eq!(encode(&[0x00, 0x01]), "100");
419
420 assert_eq!(decode("000").unwrap(), &[0x00, 0x00]);
422 assert_eq!(decode("L1").unwrap(), &[0x41]);
423 assert_eq!(decode("100").unwrap(), &[0x00, 0x01]);
424 }
425
426 #[test]
427 fn errors() {
428 assert!(matches!(decode("\t"), Err(Base44Error::InvalidChar))); assert!(matches!(decode("\n"), Err(Base44Error::InvalidChar))); assert!(matches!(decode(" "), Err(Base44Error::InvalidChar))); assert!(matches!(decode(":::"), Err(Base44Error::Overflow))); assert!(matches!(decode("//"), Err(Base44Error::Overflow))); assert!(matches!(decode("A"), Err(Base44Error::Dangling))); assert!(matches!(decode("😀"), Err(Base44Error::InvalidChar))); }
445
446 #[test]
447 fn boundary_cases() {
448 assert_eq!(encode(&[0xFF]), "Z5");
452 assert_eq!(decode("Z5").unwrap(), &[0xFF]);
453
454 assert_eq!(encode(&[0xFF, 0xFF]), "J%X");
459 assert_eq!(decode("J%X").unwrap(), &[0xFF, 0xFF]);
460
461 let alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ$%*+-./:";
463 for (idx, ch) in alphabet.chars().enumerate() {
464 if idx < 34 {
467 let s = format!("00{ch}");
468 decode(&s)
469 .unwrap_or_else(|_| panic!("Character {ch} should be valid in 3-char group"));
470 } else {
471 let s = format!("{ch}0");
473 decode(&s).unwrap_or_else(|_| panic!("Character {ch} should be valid"));
474 }
475 }
476
477 assert_eq!(encode(&[]), "");
479 assert_eq!(decode("").unwrap(), Vec::<u8>::new());
480
481 let data = &[0x01, 0x02, 0x03];
483 let encoded = encode(data);
484 assert_eq!(decode(&encoded).unwrap(), data);
485 }
486
487 #[test]
488 fn url_safe_characters() {
489 let test_data = &[
492 &[0x00][..],
493 &[0xFF],
494 &[0x00, 0xFF],
495 &[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0],
496 ];
497
498 for data in test_data {
499 let encoded = encode(data);
500 assert!(!encoded.contains(' '), "Encoded should not contain space");
501 for ch in encoded.chars() {
503 assert!(
504 BASE44_ALPHABET.contains(&(ch as u8)),
505 "Character {ch} not in alphabet"
506 );
507 }
508 }
509 }
510
511 #[test]
512 fn optimal_bit_encoding_103() {
513 let mut data = [0xFFu8; 13];
516 data[12] = 0x7F; let encoded = encode_bits(103, &data);
518 assert_eq!(encoded.len(), 19, "103 bits should encode to 19 chars");
519
520 let decoded = decode_bits(103, &encoded).unwrap();
521 assert_eq!(decoded, data.to_vec(), "Roundtrip should preserve data");
522 }
523
524 #[test]
525 fn optimal_bit_encoding_roundtrip() {
526 let test_cases = vec![
528 (8, vec![0x42]),
529 (16, vec![0x12, 0x34]),
530 (24, vec![0xAB, 0xCD, 0xEF]),
531 (
532 103,
533 vec![
534 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
535 ],
536 ),
537 (128, vec![0xFF; 16]),
538 ];
539
540 for (bits, data) in test_cases {
541 let encoded = encode_bits(bits, &data);
542 let decoded = decode_bits(bits, &encoded).unwrap();
543
544 let byte_count = bits.div_ceil(8);
546 assert_eq!(decoded.len(), byte_count);
547
548 for i in 0..byte_count {
550 if i == byte_count - 1 && bits % 8 != 0 {
551 let mask = (1u8 << (bits % 8)) - 1;
552 assert_eq!(decoded[i] & mask, data[i] & mask);
553 } else {
554 assert_eq!(decoded[i], data[i]);
555 }
556 }
557 }
558 }
559
560 #[test]
561 fn optimal_vs_byte_pair_comparison() {
562 let data = [
564 0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0, 0x11, 0x22, 0x33, 0x44, 0x55,
565 ];
566
567 let optimal = encode_bits(103, &data);
568 let byte_pair = encode(&data);
569
570 assert_eq!(optimal.len(), 19);
572 assert_eq!(byte_pair.len(), 20);
573
574 println!(
575 "103 bits: optimal={} chars, byte-pair={} chars, savings={}%",
576 optimal.len(),
577 byte_pair.len(),
578 (byte_pair.len() - optimal.len()) * 100 / byte_pair.len()
579 );
580 }
581
582 #[test]
583 fn optimal_bit_encoding_edge_cases() {
584 let zeros = vec![0u8; 13];
588 let encoded_zeros = encode_bits(103, &zeros);
589 assert_eq!(encoded_zeros.len(), 19);
590 let decoded_zeros = decode_bits(103, &encoded_zeros).unwrap();
591 assert_eq!(decoded_zeros, zeros);
592
593 let one_bit = vec![0x01];
595 let encoded_one = encode_bits(1, &one_bit);
596 let decoded_one = decode_bits(1, &encoded_one).unwrap();
597 assert_eq!(decoded_one[0] & 0x01, 1);
598
599 let mut max_103 = vec![0xFFu8; 13];
601 max_103[12] = 0x7F; let encoded_max = encode_bits(103, &max_103);
603 let decoded_max = decode_bits(103, &encoded_max).unwrap();
604 assert_eq!(decoded_max, max_103);
605 }
606
607 #[test]
608 fn large_bit_counts() {
609 let data_256 = vec![0x42u8; 32]; let encoded_256 = encode_bits(256, &data_256);
612 assert_eq!(encoded_256.len(), 47);
614 let decoded_256 = decode_bits(256, &encoded_256).unwrap();
615 assert_eq!(decoded_256, data_256);
616
617 let data_512 = vec![0xABu8; 64]; let encoded_512 = encode_bits(512, &data_512);
620 assert_eq!(encoded_512.len(), 94);
622 let decoded_512 = decode_bits(512, &encoded_512).unwrap();
623 assert_eq!(decoded_512, data_512);
624
625 let data_1024 = vec![0x11u8; 128]; let encoded_1024 = encode_bits(1024, &data_1024);
628 let decoded_1024 = decode_bits(1024, &encoded_1024).unwrap();
629 assert_eq!(decoded_1024, data_1024);
630 }
631
632 #[test]
633 fn optimized_paths_u64_boundary() {
634 let data_64 = vec![0xFFu8; 8]; let encoded = encode_bits(64, &data_64);
637 let decoded = decode_bits(64, &encoded).unwrap();
638 assert_eq!(decoded, data_64);
639
640 let mut data_63 = vec![0xFFu8; 8];
642 data_63[7] = 0x7F; let encoded = encode_bits(63, &data_63);
644 let decoded = decode_bits(63, &encoded).unwrap();
645 assert_eq!(decoded[7] & 0x7F, data_63[7]);
646 }
647
648 #[test]
649 fn optimized_paths_u128_boundary() {
650 let data_128 = vec![0xFFu8; 16]; let encoded = encode_bits(128, &data_128);
653 let decoded = decode_bits(128, &encoded).unwrap();
654 assert_eq!(decoded, data_128);
655
656 let mut data_129 = vec![0xFFu8; 17];
658 data_129[16] = 0x01; let encoded = encode_bits(129, &data_129);
660 let decoded = decode_bits(129, &encoded).unwrap();
661 assert_eq!(decoded[16] & 0x01, data_129[16]);
662 }
663
664 #[test]
665 fn optimized_paths_small_bits() {
666 for bits in 1..=7 {
669 let mask = (1u8 << bits) - 1;
670 let data = vec![0x7Fu8 & mask]; let encoded = encode_bits(bits, &data);
672 let decoded = decode_bits(bits, &encoded).unwrap();
673 assert_eq!(decoded[0] & mask, data[0] & mask, "Failed for {bits} bits");
674 }
675
676 let data_32 = vec![0x12, 0x34, 0x56, 0x78];
678 let encoded_32 = encode_bits(32, &data_32);
679 let decoded_32 = decode_bits(32, &encoded_32).unwrap();
680 assert_eq!(decoded_32, data_32);
681
682 let data_48 = vec![0xFF; 6];
684 let encoded_48 = encode_bits(48, &data_48);
685 let decoded_48 = decode_bits(48, &encoded_48).unwrap();
686 assert_eq!(decoded_48, data_48);
687 }
688
689 #[test]
690 fn encode_bits_error_cases() {
691 assert!(matches!(
693 decode_bits(8, "ABC😀"),
694 Err(Base44Error::InvalidChar)
695 ));
696
697 assert!(matches!(
698 decode_bits(8, "ABC "),
699 Err(Base44Error::InvalidChar)
700 ));
701
702 let large_value = "ZZZ"; let result = decode_bits(8, large_value);
706 assert!(matches!(result, Err(Base44Error::Overflow)));
707
708 let large_value_128 = "ZZZZZZZZZZZZZZ";
710 let result = decode_bits(65, large_value_128);
711 assert!(matches!(result, Err(Base44Error::Overflow)));
712 }
713
714 #[test]
715 fn encode_bits_various_patterns() {
716 let alternating = vec![0xAA, 0x55, 0xAA, 0x55]; let encoded = encode_bits(32, &alternating);
719 let decoded = decode_bits(32, &encoded).unwrap();
720 assert_eq!(decoded, alternating);
721
722 let sequential: Vec<u8> = (0..16).collect();
724 let encoded = encode_bits(128, &sequential);
725 let decoded = decode_bits(128, &encoded).unwrap();
726 assert_eq!(decoded, sequential);
727
728 let random = vec![0x9E, 0x3D, 0x7B, 0x2F, 0xC8, 0x15, 0x64, 0xAA];
730 let encoded = encode_bits(64, &random);
731 let decoded = decode_bits(64, &encoded).unwrap();
732 assert_eq!(decoded, random);
733 }
734
735 #[test]
736 fn cross_verify_encode_methods() {
737 let data_8 = vec![0x42];
742 let encoded_bits = encode_bits(8, &data_8);
743 let decoded_bits = decode_bits(8, &encoded_bits).unwrap();
744 assert_eq!(decoded_bits, data_8);
745
746 let data_16 = vec![0x12, 0x34];
748 let encoded_bits = encode_bits(16, &data_16);
749 let decoded_bits = decode_bits(16, &encoded_bits).unwrap();
750 assert_eq!(decoded_bits, data_16);
751
752 let data = vec![0xAB, 0xCD];
754 let encoded_pair = encode(&data);
755 let encoded_bits = encode_bits(16, &data);
756
757 assert_eq!(decode(&encoded_pair).unwrap(), data);
759 assert_eq!(decode_bits(16, &encoded_bits).unwrap(), data);
760 }
761
762 #[test]
763 fn alphabet_completeness() {
764 let mut chars: Vec<u8> = BASE44_ALPHABET.to_vec();
766 chars.sort();
767 chars.dedup();
768 assert_eq!(
769 chars.len(),
770 44,
771 "Alphabet should have exactly 44 unique characters"
772 );
773
774 for (expected_val, &ch) in BASE44_ALPHABET.iter().enumerate() {
776 let val = b44_val(ch);
777 let ch_char = ch as char;
778 assert_eq!(
779 val,
780 Some(expected_val as u16),
781 "Character {ch_char} should map to value {expected_val}"
782 );
783 }
784
785 for ch in [b' ', b'\t', b'\n', b'@', b'[', b'`', b'{'] {
787 let ch_char = ch as char;
788 assert_eq!(b44_val(ch), None, "Character {ch_char} should not be valid");
789 }
790 }
791}