base_d/
alphabet.rs

1use std::collections::HashMap;
2use crate::config::EncodingMode;
3
4#[derive(Debug, Clone)]
5pub struct Alphabet {
6    chars: Vec<char>,
7    char_to_index: HashMap<char, usize>,
8    mode: EncodingMode,
9    padding: Option<char>,
10    start_codepoint: Option<u32>,
11}
12
13impl Alphabet {
14    pub fn new(chars: Vec<char>) -> Result<Self, String> {
15        Self::new_with_mode(chars, EncodingMode::BaseConversion, None)
16    }
17    
18    pub fn new_with_mode(chars: Vec<char>, mode: EncodingMode, padding: Option<char>) -> Result<Self, String> {
19        Self::new_with_mode_and_range(chars, mode, padding, None)
20    }
21    
22    pub fn new_with_mode_and_range(chars: Vec<char>, mode: EncodingMode, padding: Option<char>, start_codepoint: Option<u32>) -> Result<Self, String> {
23        // ByteRange mode doesn't need chars, just validates start_codepoint
24        if mode == EncodingMode::ByteRange {
25            if let Some(start) = start_codepoint {
26                // Validate that we can represent all 256 bytes
27                if let Some(end_codepoint) = start.checked_add(255) {
28                    if std::char::from_u32(end_codepoint).is_none() {
29                        return Err(format!("Invalid Unicode range: {}-{}", start, end_codepoint));
30                    }
31                } else {
32                    return Err("Start codepoint too high for 256-byte range".to_string());
33                }
34                
35                return Ok(Alphabet {
36                    chars: Vec::new(),
37                    char_to_index: HashMap::new(),
38                    mode,
39                    padding,
40                    start_codepoint: Some(start),
41                });
42            } else {
43                return Err("ByteRange mode requires start_codepoint".to_string());
44            }
45        }
46        
47        if chars.is_empty() {
48            return Err("Alphabet cannot be empty".to_string());
49        }
50        
51        // Validate alphabet size for chunked mode
52        if mode == EncodingMode::Chunked {
53            let base = chars.len();
54            if !base.is_power_of_two() {
55                return Err(format!("Chunked mode requires power-of-two alphabet size, got {}", base));
56            }
57        }
58        
59        let mut char_to_index = HashMap::new();
60        for (i, &c) in chars.iter().enumerate() {
61            if char_to_index.insert(c, i).is_some() {
62                return Err(format!("Duplicate character in alphabet: {}", c));
63            }
64        }
65        
66        Ok(Alphabet {
67            chars,
68            char_to_index,
69            mode,
70            padding,
71            start_codepoint: None,
72        })
73    }
74    
75    pub fn from_str(s: &str) -> Result<Self, String> {
76        let chars: Vec<char> = s.chars().collect();
77        Self::new(chars)
78    }
79    
80    pub fn base(&self) -> usize {
81        match self.mode {
82            EncodingMode::ByteRange => 256,
83            _ => self.chars.len(),
84        }
85    }
86    
87    pub fn mode(&self) -> &EncodingMode {
88        &self.mode
89    }
90    
91    pub fn padding(&self) -> Option<char> {
92        self.padding
93    }
94    
95    pub fn start_codepoint(&self) -> Option<u32> {
96        self.start_codepoint
97    }
98    
99    pub fn encode_digit(&self, digit: usize) -> Option<char> {
100        match self.mode {
101            EncodingMode::ByteRange => {
102                if let Some(start) = self.start_codepoint {
103                    if digit < 256 {
104                        return std::char::from_u32(start + digit as u32);
105                    }
106                }
107                None
108            }
109            _ => self.chars.get(digit).copied(),
110        }
111    }
112    
113    pub fn decode_char(&self, c: char) -> Option<usize> {
114        match self.mode {
115            EncodingMode::ByteRange => {
116                if let Some(start) = self.start_codepoint {
117                    let codepoint = c as u32;
118                    if codepoint >= start && codepoint < start + 256 {
119                        return Some((codepoint - start) as usize);
120                    }
121                }
122                None
123            }
124            _ => self.char_to_index.get(&c).copied(),
125        }
126    }
127}
128
129