base_d/core/
word_dictionary.rs

1use std::collections::HashMap;
2
3/// A word-based dictionary for encoding binary data as word sequences.
4///
5/// Unlike character-based `Dictionary`, this uses whole words as encoding symbols.
6/// Designed for BIP-39, Diceware, PGP word lists, and custom word-based encodings.
7///
8/// # Example
9///
10/// ```
11/// use base_d::WordDictionary;
12///
13/// let dict = WordDictionary::builder()
14///     .words(vec!["abandon", "ability", "able", "about"])
15///     .delimiter(" ")
16///     .build()
17///     .unwrap();
18///
19/// assert_eq!(dict.base(), 4);
20/// assert_eq!(dict.encode_word(0), Some("abandon"));
21/// assert_eq!(dict.decode_word("ability"), Some(1));
22/// ```
23#[derive(Debug, Clone)]
24pub struct WordDictionary {
25    words: Vec<String>,
26    word_to_index: HashMap<String, usize>,
27    delimiter: String,
28    case_sensitive: bool,
29}
30
31impl WordDictionary {
32    /// Creates a new WordDictionaryBuilder for constructing a WordDictionary.
33    pub fn builder() -> WordDictionaryBuilder {
34        WordDictionaryBuilder::new()
35    }
36
37    /// Returns the base (number of words) in this dictionary.
38    pub fn base(&self) -> usize {
39        self.words.len()
40    }
41
42    /// Returns the delimiter used between words in encoded output.
43    pub fn delimiter(&self) -> &str {
44        &self.delimiter
45    }
46
47    /// Returns whether this dictionary uses case-sensitive matching.
48    pub fn case_sensitive(&self) -> bool {
49        self.case_sensitive
50    }
51
52    /// Encodes a digit (0 to base-1) as a word.
53    ///
54    /// Returns `None` if the index is out of range.
55    pub fn encode_word(&self, index: usize) -> Option<&str> {
56        self.words.get(index).map(|s| s.as_str())
57    }
58
59    /// Decodes a word back to its index value.
60    ///
61    /// Returns `None` if the word is not in the dictionary.
62    /// Matching respects the `case_sensitive` setting.
63    pub fn decode_word(&self, word: &str) -> Option<usize> {
64        let key = if self.case_sensitive {
65            word.to_string()
66        } else {
67            word.to_lowercase()
68        };
69        self.word_to_index.get(&key).copied()
70    }
71
72    /// Returns an iterator over all words in the dictionary.
73    pub fn words(&self) -> impl Iterator<Item = &str> {
74        self.words.iter().map(|s| s.as_str())
75    }
76}
77
78/// Builder for constructing a WordDictionary with flexible configuration.
79///
80/// # Example
81///
82/// ```
83/// use base_d::WordDictionary;
84///
85/// let dict = WordDictionary::builder()
86///     .words(vec!["alpha", "bravo", "charlie", "delta"])
87///     .delimiter("-")
88///     .case_sensitive(false)
89///     .build()
90///     .unwrap();
91/// ```
92#[derive(Debug, Default)]
93pub struct WordDictionaryBuilder {
94    words: Option<Vec<String>>,
95    delimiter: Option<String>,
96    case_sensitive: Option<bool>,
97}
98
99impl WordDictionaryBuilder {
100    /// Creates a new WordDictionaryBuilder with default settings.
101    pub fn new() -> Self {
102        Self::default()
103    }
104
105    /// Sets the word list from a vector of strings.
106    pub fn words<I, S>(mut self, words: I) -> Self
107    where
108        I: IntoIterator<Item = S>,
109        S: Into<String>,
110    {
111        self.words = Some(words.into_iter().map(|s| s.into()).collect());
112        self
113    }
114
115    /// Sets the word list from a newline-separated string.
116    ///
117    /// Empty lines are ignored. Leading/trailing whitespace is trimmed.
118    pub fn words_from_str(mut self, s: &str) -> Self {
119        self.words = Some(
120            s.lines()
121                .map(|line| line.trim())
122                .filter(|line| !line.is_empty())
123                .map(|line| line.to_string())
124                .collect(),
125        );
126        self
127    }
128
129    /// Sets the delimiter used between words in encoded output.
130    ///
131    /// Default is a single space " ".
132    pub fn delimiter<S: Into<String>>(mut self, delimiter: S) -> Self {
133        self.delimiter = Some(delimiter.into());
134        self
135    }
136
137    /// Sets whether word matching is case-sensitive.
138    ///
139    /// Default is false (case-insensitive).
140    pub fn case_sensitive(mut self, case_sensitive: bool) -> Self {
141        self.case_sensitive = Some(case_sensitive);
142        self
143    }
144
145    /// Builds the WordDictionary with the configured settings.
146    ///
147    /// # Errors
148    ///
149    /// Returns an error if:
150    /// - No words were provided
151    /// - The word list is empty
152    /// - Duplicate words exist (considering case sensitivity)
153    pub fn build(self) -> Result<WordDictionary, String> {
154        let words = self.words.ok_or("No words provided")?;
155
156        if words.is_empty() {
157            return Err("Word list cannot be empty".to_string());
158        }
159
160        let case_sensitive = self.case_sensitive.unwrap_or(false);
161        let delimiter = self.delimiter.unwrap_or_else(|| " ".to_string());
162
163        // Build the reverse lookup, checking for duplicates
164        let mut word_to_index = HashMap::with_capacity(words.len());
165        for (i, word) in words.iter().enumerate() {
166            let key = if case_sensitive {
167                word.clone()
168            } else {
169                word.to_lowercase()
170            };
171
172            if word_to_index.insert(key.clone(), i).is_some() {
173                return Err(format!(
174                    "Duplicate word in dictionary: '{}' (normalized: '{}')",
175                    word, key
176                ));
177            }
178        }
179
180        Ok(WordDictionary {
181            words,
182            word_to_index,
183            delimiter,
184            case_sensitive,
185        })
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn test_basic_word_dictionary() {
195        let dict = WordDictionary::builder()
196            .words(vec!["abandon", "ability", "able", "about"])
197            .build()
198            .unwrap();
199
200        assert_eq!(dict.base(), 4);
201        assert_eq!(dict.delimiter(), " ");
202        assert!(!dict.case_sensitive());
203    }
204
205    #[test]
206    fn test_encode_word() {
207        let dict = WordDictionary::builder()
208            .words(vec!["alpha", "bravo", "charlie"])
209            .build()
210            .unwrap();
211
212        assert_eq!(dict.encode_word(0), Some("alpha"));
213        assert_eq!(dict.encode_word(1), Some("bravo"));
214        assert_eq!(dict.encode_word(2), Some("charlie"));
215        assert_eq!(dict.encode_word(3), None);
216    }
217
218    #[test]
219    fn test_decode_word_case_insensitive() {
220        let dict = WordDictionary::builder()
221            .words(vec!["Alpha", "Bravo", "Charlie"])
222            .case_sensitive(false)
223            .build()
224            .unwrap();
225
226        assert_eq!(dict.decode_word("alpha"), Some(0));
227        assert_eq!(dict.decode_word("ALPHA"), Some(0));
228        assert_eq!(dict.decode_word("Alpha"), Some(0));
229        assert_eq!(dict.decode_word("delta"), None);
230    }
231
232    #[test]
233    fn test_decode_word_case_sensitive() {
234        let dict = WordDictionary::builder()
235            .words(vec!["Alpha", "Bravo", "Charlie"])
236            .case_sensitive(true)
237            .build()
238            .unwrap();
239
240        assert_eq!(dict.decode_word("Alpha"), Some(0));
241        assert_eq!(dict.decode_word("alpha"), None);
242        assert_eq!(dict.decode_word("ALPHA"), None);
243    }
244
245    #[test]
246    fn test_custom_delimiter() {
247        let dict = WordDictionary::builder()
248            .words(vec!["one", "two", "three"])
249            .delimiter("-")
250            .build()
251            .unwrap();
252
253        assert_eq!(dict.delimiter(), "-");
254    }
255
256    #[test]
257    fn test_words_from_str() {
258        let word_list = "abandon\nability\nable\nabout";
259        let dict = WordDictionary::builder()
260            .words_from_str(word_list)
261            .build()
262            .unwrap();
263
264        assert_eq!(dict.base(), 4);
265        assert_eq!(dict.encode_word(0), Some("abandon"));
266    }
267
268    #[test]
269    fn test_words_from_str_with_whitespace() {
270        let word_list = "  abandon  \n\n  ability  \n  able  \n\n";
271        let dict = WordDictionary::builder()
272            .words_from_str(word_list)
273            .build()
274            .unwrap();
275
276        assert_eq!(dict.base(), 3);
277        assert_eq!(dict.encode_word(0), Some("abandon"));
278    }
279
280    #[test]
281    fn test_empty_word_list_error() {
282        let result = WordDictionary::builder()
283            .words(Vec::<String>::new())
284            .build();
285        assert!(result.is_err());
286        assert!(result.unwrap_err().contains("empty"));
287    }
288
289    #[test]
290    fn test_no_words_error() {
291        let result = WordDictionary::builder().build();
292        assert!(result.is_err());
293        assert!(result.unwrap_err().contains("No words"));
294    }
295
296    #[test]
297    fn test_duplicate_words_error() {
298        let result = WordDictionary::builder()
299            .words(vec!["alpha", "bravo", "alpha"])
300            .build();
301        assert!(result.is_err());
302        assert!(result.unwrap_err().contains("Duplicate"));
303    }
304
305    #[test]
306    fn test_duplicate_words_case_insensitive() {
307        let result = WordDictionary::builder()
308            .words(vec!["Alpha", "ALPHA"])
309            .case_sensitive(false)
310            .build();
311        assert!(result.is_err());
312        assert!(result.unwrap_err().contains("Duplicate"));
313    }
314
315    #[test]
316    fn test_duplicate_words_case_sensitive_allowed() {
317        // With case sensitivity, "Alpha" and "ALPHA" are different
318        let result = WordDictionary::builder()
319            .words(vec!["Alpha", "ALPHA"])
320            .case_sensitive(true)
321            .build();
322        assert!(result.is_ok());
323        let dict = result.unwrap();
324        assert_eq!(dict.base(), 2);
325    }
326
327    #[test]
328    fn test_words_iterator() {
329        let dict = WordDictionary::builder()
330            .words(vec!["a", "b", "c"])
331            .build()
332            .unwrap();
333
334        let words: Vec<&str> = dict.words().collect();
335        assert_eq!(words, vec!["a", "b", "c"]);
336    }
337}