base_d/
alphabet.rs

1use std::collections::HashMap;
2use crate::config::EncodingMode;
3
4/// Represents an encoding alphabet with its characters and configuration.
5///
6/// An alphabet defines the character set and encoding mode used for converting
7/// binary data to text. Supports three modes: mathematical base conversion,
8/// chunked (RFC 4648), and byte-range mapping.
9#[derive(Debug, Clone)]
10pub struct Alphabet {
11    chars: Vec<char>,
12    char_to_index: HashMap<char, usize>,
13    mode: EncodingMode,
14    padding: Option<char>,
15    start_codepoint: Option<u32>,
16}
17
18impl Alphabet {
19    /// Creates a new alphabet with default settings (BaseConversion mode, no padding).
20    ///
21    /// # Arguments
22    ///
23    /// * `chars` - Vector of characters to use in the alphabet
24    ///
25    /// # Errors
26    ///
27    /// Returns an error if the alphabet is empty or contains duplicate characters.
28    pub fn new(chars: Vec<char>) -> Result<Self, String> {
29        Self::new_with_mode(chars, EncodingMode::BaseConversion, None)
30    }
31    
32    /// Creates a new alphabet with specified encoding mode and optional padding.
33    ///
34    /// # Arguments
35    ///
36    /// * `chars` - Vector of characters to use in the alphabet
37    /// * `mode` - Encoding mode (BaseConversion, Chunked, or ByteRange)
38    /// * `padding` - Optional padding character (typically '=' for RFC modes)
39    ///
40    /// # Errors
41    ///
42    /// Returns an error if:
43    /// - The alphabet is empty or contains duplicates
44    /// - Chunked mode is used with a non-power-of-two alphabet size
45    pub fn new_with_mode(chars: Vec<char>, mode: EncodingMode, padding: Option<char>) -> Result<Self, String> {
46        Self::new_with_mode_and_range(chars, mode, padding, None)
47    }
48    
49    /// Creates a new alphabet with full configuration including byte-range support.
50    ///
51    /// # Arguments
52    ///
53    /// * `chars` - Vector of characters (empty for ByteRange mode)
54    /// * `mode` - Encoding mode
55    /// * `padding` - Optional padding character
56    /// * `start_codepoint` - Starting Unicode codepoint for ByteRange mode
57    ///
58    /// # Errors
59    ///
60    /// Returns an error if configuration is invalid for the specified mode.
61    pub fn new_with_mode_and_range(chars: Vec<char>, mode: EncodingMode, padding: Option<char>, start_codepoint: Option<u32>) -> Result<Self, String> {
62        // ByteRange mode doesn't need chars, just validates start_codepoint
63        if mode == EncodingMode::ByteRange {
64            if let Some(start) = start_codepoint {
65                // Validate that we can represent all 256 bytes
66                if let Some(end_codepoint) = start.checked_add(255) {
67                    if std::char::from_u32(end_codepoint).is_none() {
68                        return Err(format!("Invalid Unicode range: {}-{}", start, end_codepoint));
69                    }
70                } else {
71                    return Err("Start codepoint too high for 256-byte range".to_string());
72                }
73                
74                return Ok(Alphabet {
75                    chars: Vec::new(),
76                    char_to_index: HashMap::new(),
77                    mode,
78                    padding,
79                    start_codepoint: Some(start),
80                });
81            } else {
82                return Err("ByteRange mode requires start_codepoint".to_string());
83            }
84        }
85        
86        if chars.is_empty() {
87            return Err("Alphabet cannot be empty".to_string());
88        }
89        
90        // Validate alphabet size for chunked mode
91        if mode == EncodingMode::Chunked {
92            let base = chars.len();
93            if !base.is_power_of_two() {
94                return Err(format!("Chunked mode requires power-of-two alphabet size, got {}", base));
95            }
96        }
97        
98        let mut char_to_index = HashMap::new();
99        for (i, &c) in chars.iter().enumerate() {
100            if char_to_index.insert(c, i).is_some() {
101                return Err(format!("Duplicate character in alphabet: {}", c));
102            }
103        }
104        
105        Ok(Alphabet {
106            chars,
107            char_to_index,
108            mode,
109            padding,
110            start_codepoint: None,
111        })
112    }
113    
114    /// Creates an alphabet from a string of characters.
115    ///
116    /// # Arguments
117    ///
118    /// * `s` - String containing the alphabet characters
119    pub fn from_str(s: &str) -> Result<Self, String> {
120        let chars: Vec<char> = s.chars().collect();
121        Self::new(chars)
122    }
123    
124    /// Returns the base (radix) of the alphabet.
125    ///
126    /// For ByteRange mode, always returns 256. Otherwise returns the number of characters.
127    pub fn base(&self) -> usize {
128        match self.mode {
129            EncodingMode::ByteRange => 256,
130            _ => self.chars.len(),
131        }
132    }
133    
134    /// Returns the encoding mode of this alphabet.
135    pub fn mode(&self) -> &EncodingMode {
136        &self.mode
137    }
138    
139    /// Returns the padding character, if any.
140    pub fn padding(&self) -> Option<char> {
141        self.padding
142    }
143    
144    /// Returns the starting Unicode codepoint for ByteRange mode.
145    pub fn start_codepoint(&self) -> Option<u32> {
146        self.start_codepoint
147    }
148    
149    /// Encodes a digit (0 to base-1) as a character.
150    ///
151    /// Returns `None` if the digit is out of range.
152    pub fn encode_digit(&self, digit: usize) -> Option<char> {
153        match self.mode {
154            EncodingMode::ByteRange => {
155                if let Some(start) = self.start_codepoint {
156                    if digit < 256 {
157                        return std::char::from_u32(start + digit as u32);
158                    }
159                }
160                None
161            }
162            _ => self.chars.get(digit).copied(),
163        }
164    }
165    
166    /// Decodes a character back to its digit value.
167    ///
168    /// Returns `None` if the character is not in the alphabet.
169    pub fn decode_char(&self, c: char) -> Option<usize> {
170        match self.mode {
171            EncodingMode::ByteRange => {
172                if let Some(start) = self.start_codepoint {
173                    let codepoint = c as u32;
174                    if codepoint >= start && codepoint < start + 256 {
175                        return Some((codepoint - start) as usize);
176                    }
177                }
178                None
179            }
180            _ => self.char_to_index.get(&c).copied(),
181        }
182    }
183}
184
185