base_d/encoders/
encoding.rs

1use crate::core::dictionary::Dictionary;
2use num_integer::Integer;
3use num_traits::Zero;
4
5/// Errors that can occur during decoding.
6#[derive(Debug, PartialEq, Eq)]
7pub enum DecodeError {
8    /// The input contains a character not in the dictionary
9    InvalidCharacter(char),
10    /// The input string is empty
11    EmptyInput,
12    /// The padding is malformed or incorrect
13    InvalidPadding,
14}
15
16impl std::fmt::Display for DecodeError {
17    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
18        match self {
19            DecodeError::InvalidCharacter(c) => write!(f, "Invalid character in input: {}", c),
20            DecodeError::EmptyInput => write!(f, "Cannot decode empty input"),
21            DecodeError::InvalidPadding => write!(f, "Invalid padding"),
22        }
23    }
24}
25
26impl std::error::Error for DecodeError {}
27
28pub fn encode(data: &[u8], dictionary: &Dictionary) -> String {
29    if data.is_empty() {
30        return String::new();
31    }
32
33    // Count leading zeros for efficient handling
34    let leading_zeros = data.iter().take_while(|&&b| b == 0).count();
35
36    // If all zeros, return early
37    if leading_zeros == data.len() {
38        return dictionary
39            .encode_digit(0)
40            .unwrap()
41            .to_string()
42            .repeat(data.len());
43    }
44
45    let base = dictionary.base();
46    let mut num = num_bigint::BigUint::from_bytes_be(&data[leading_zeros..]);
47
48    // Pre-allocate result vector with estimated capacity
49    let max_digits =
50        ((data.len() - leading_zeros) * 8 * 1000) / (base as f64).log2() as usize / 1000 + 1;
51    let mut result = Vec::with_capacity(max_digits + leading_zeros);
52
53    let base_big = num_bigint::BigUint::from(base);
54
55    while !num.is_zero() {
56        let (quotient, remainder) = num.div_rem(&base_big);
57        let digit = remainder.to_u64_digits();
58        let digit_val = if digit.is_empty() {
59            0
60        } else {
61            digit[0] as usize
62        };
63        result.push(dictionary.encode_digit(digit_val).unwrap());
64        num = quotient;
65    }
66
67    // Add leading zeros
68    for _ in 0..leading_zeros {
69        result.push(dictionary.encode_digit(0).unwrap());
70    }
71
72    result.reverse();
73    result.into_iter().collect()
74}
75
76pub fn decode(encoded: &str, dictionary: &Dictionary) -> Result<Vec<u8>, DecodeError> {
77    if encoded.is_empty() {
78        return Err(DecodeError::EmptyInput);
79    }
80
81    let base = dictionary.base();
82    let mut num = num_bigint::BigUint::from(0u8);
83    let base_big = num_bigint::BigUint::from(base);
84
85    // Collect chars once for better cache performance
86    let chars: Vec<char> = encoded.chars().collect();
87    let mut leading_zeros = 0;
88
89    // Process in chunks for better performance
90    for &c in &chars {
91        let digit = dictionary
92            .decode_char(c)
93            .ok_or(DecodeError::InvalidCharacter(c))?;
94
95        if num.is_zero() && digit == 0 {
96            leading_zeros += 1;
97        } else {
98            num *= &base_big;
99            num += num_bigint::BigUint::from(digit);
100        }
101    }
102
103    // Handle all-zero case
104    if num.is_zero() && leading_zeros > 0 {
105        return Ok(vec![0u8; leading_zeros]);
106    }
107
108    let bytes = num.to_bytes_be();
109
110    // Construct result with pre-allocated capacity
111    let mut result = Vec::with_capacity(leading_zeros + bytes.len());
112    result.resize(leading_zeros, 0u8);
113    result.extend_from_slice(&bytes);
114
115    Ok(result)
116}