1use std::collections::HashMap;
2use crate::config::EncodingMode;
3
4#[derive(Debug, Clone)]
5pub struct Alphabet {
6 chars: Vec<char>,
7 char_to_index: HashMap<char, usize>,
8 mode: EncodingMode,
9 padding: Option<char>,
10 start_codepoint: Option<u32>,
11}
12
13impl Alphabet {
14 pub fn new(chars: Vec<char>) -> Result<Self, String> {
15 Self::new_with_mode(chars, EncodingMode::BaseConversion, None)
16 }
17
18 pub fn new_with_mode(chars: Vec<char>, mode: EncodingMode, padding: Option<char>) -> Result<Self, String> {
19 Self::new_with_mode_and_range(chars, mode, padding, None)
20 }
21
22 pub fn new_with_mode_and_range(chars: Vec<char>, mode: EncodingMode, padding: Option<char>, start_codepoint: Option<u32>) -> Result<Self, String> {
23 if mode == EncodingMode::ByteRange {
25 if let Some(start) = start_codepoint {
26 if let Some(end_codepoint) = start.checked_add(255) {
28 if std::char::from_u32(end_codepoint).is_none() {
29 return Err(format!("Invalid Unicode range: {}-{}", start, end_codepoint));
30 }
31 } else {
32 return Err("Start codepoint too high for 256-byte range".to_string());
33 }
34
35 return Ok(Alphabet {
36 chars: Vec::new(),
37 char_to_index: HashMap::new(),
38 mode,
39 padding,
40 start_codepoint: Some(start),
41 });
42 } else {
43 return Err("ByteRange mode requires start_codepoint".to_string());
44 }
45 }
46
47 if chars.is_empty() {
48 return Err("Alphabet cannot be empty".to_string());
49 }
50
51 if mode == EncodingMode::Chunked {
53 let base = chars.len();
54 if !base.is_power_of_two() {
55 return Err(format!("Chunked mode requires power-of-two alphabet size, got {}", base));
56 }
57 }
58
59 let mut char_to_index = HashMap::new();
60 for (i, &c) in chars.iter().enumerate() {
61 if char_to_index.insert(c, i).is_some() {
62 return Err(format!("Duplicate character in alphabet: {}", c));
63 }
64 }
65
66 Ok(Alphabet {
67 chars,
68 char_to_index,
69 mode,
70 padding,
71 start_codepoint: None,
72 })
73 }
74
75 pub fn from_str(s: &str) -> Result<Self, String> {
76 let chars: Vec<char> = s.chars().collect();
77 Self::new(chars)
78 }
79
80 pub fn base(&self) -> usize {
81 match self.mode {
82 EncodingMode::ByteRange => 256,
83 _ => self.chars.len(),
84 }
85 }
86
87 pub fn mode(&self) -> &EncodingMode {
88 &self.mode
89 }
90
91 pub fn padding(&self) -> Option<char> {
92 self.padding
93 }
94
95 pub fn start_codepoint(&self) -> Option<u32> {
96 self.start_codepoint
97 }
98
99 pub fn encode_digit(&self, digit: usize) -> Option<char> {
100 match self.mode {
101 EncodingMode::ByteRange => {
102 if let Some(start) = self.start_codepoint {
103 if digit < 256 {
104 return std::char::from_u32(start + digit as u32);
105 }
106 }
107 None
108 }
109 _ => self.chars.get(digit).copied(),
110 }
111 }
112
113 pub fn decode_char(&self, c: char) -> Option<usize> {
114 match self.mode {
115 EncodingMode::ByteRange => {
116 if let Some(start) = self.start_codepoint {
117 let codepoint = c as u32;
118 if codepoint >= start && codepoint < start + 256 {
119 return Some((codepoint - start) as usize);
120 }
121 }
122 None
123 }
124 _ => self.char_to_index.get(&c).copied(),
125 }
126 }
127}
128
129