Skip to main content

base_d/core/
config.rs

1use serde::Deserialize;
2use std::collections::HashMap;
3
4// Include generated dictionary registry from build.rs
5include!(concat!(env!("OUT_DIR"), "/registry.rs"));
6
7/// Dictionary type: character-based or word-based.
8#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)]
9#[serde(rename_all = "snake_case")]
10pub enum DictionaryType {
11    /// Character-based dictionary (traditional encoding)
12    #[default]
13    Char,
14    /// Word-based dictionary (BIP-39, Diceware, etc.)
15    Word,
16}
17
18/// Encoding strategy for converting binary data to text.
19///
20/// Different modes offer different tradeoffs between efficiency, compatibility,
21/// and features.
22#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
23#[serde(rename_all = "snake_case")]
24#[derive(Default)]
25pub enum EncodingMode {
26    /// True radix/base conversion treating data as a large number.
27    /// Works with any dictionary size. Output length varies with input.
28    /// Requires entire input before producing output (not streamable).
29    #[default]
30    #[serde(alias = "base_conversion")]
31    Radix,
32    /// Fixed-size bit chunking per RFC 4648.
33    /// Requires power-of-two dictionary size. Supports padding.
34    Chunked,
35    /// Direct 1:1 byte-to-character mapping using Unicode codepoint ranges.
36    /// Zero encoding overhead. Always 256 characters.
37    ByteRange,
38}
39
40/// Configuration for a single dictionary loaded from TOML.
41#[derive(Debug, Deserialize, Clone)]
42pub struct DictionaryConfig {
43    // === Type discriminant ===
44    /// Dictionary type: "char" (default) or "word"
45    #[serde(default, rename = "type")]
46    pub dictionary_type: DictionaryType,
47
48    // === Character-based fields ===
49    /// The characters comprising the dictionary (explicit list)
50    #[serde(default)]
51    pub chars: String,
52    /// Starting character for range-based dictionary definition
53    /// Use with `length` to define sequential Unicode ranges
54    #[serde(default)]
55    pub start: Option<String>,
56    /// Number of characters in range-based dictionary
57    /// Use with `start` to define sequential Unicode ranges
58    #[serde(default)]
59    pub length: Option<usize>,
60    /// Starting Unicode codepoint for ByteRange mode (256 chars)
61    #[serde(default)]
62    pub start_codepoint: Option<u32>,
63
64    // === Word-based fields ===
65    /// Inline word list for word-based dictionaries
66    #[serde(default)]
67    pub words: Option<Vec<String>>,
68    /// Path to external word list file (one word per line)
69    #[serde(default)]
70    pub words_file: Option<String>,
71    /// Delimiter between words in encoded output (default: " ")
72    #[serde(default)]
73    pub delimiter: Option<String>,
74    /// Whether word matching is case-sensitive (default: false)
75    #[serde(default)]
76    pub case_sensitive: Option<bool>,
77    /// Names of sub-dictionaries for alternating word encoding (e.g., ["pgp_even", "pgp_odd"])
78    #[serde(default)]
79    pub alternating: Option<Vec<String>>,
80
81    // === Common fields ===
82    /// The encoding mode to use (auto-detected if not specified)
83    #[serde(default)]
84    pub mode: Option<EncodingMode>,
85    /// Optional padding character (e.g., "=" for base64)
86    #[serde(default)]
87    pub padding: Option<String>,
88    /// Whether this dictionary renders consistently across platforms (default: true)
89    /// Dictionaries with common=false are excluded from random selection (--dejavu)
90    #[serde(default = "default_true")]
91    pub common: bool,
92}
93
94impl Default for DictionaryConfig {
95    fn default() -> Self {
96        Self {
97            dictionary_type: DictionaryType::default(),
98            chars: String::new(),
99            start: None,
100            length: None,
101            start_codepoint: None,
102            words: None,
103            words_file: None,
104            delimiter: None,
105            case_sensitive: None,
106            alternating: None,
107            mode: None,
108            padding: None,
109            common: true, // default to common for random selection
110        }
111    }
112}
113
114impl DictionaryConfig {
115    /// Returns the effective character set, generating from range if needed.
116    ///
117    /// Priority:
118    /// 1. If `chars` is non-empty, use it directly
119    /// 2. If `start` + `length` are set, generate sequential range
120    /// 3. Otherwise return empty string (ByteRange mode uses start_codepoint instead)
121    pub fn effective_chars(&self) -> Result<String, String> {
122        // Explicit chars take priority
123        if !self.chars.is_empty() {
124            return Ok(self.chars.clone());
125        }
126
127        // Generate from start + length range
128        if let (Some(start_str), Some(length)) = (&self.start, self.length) {
129            let start_char = start_str
130                .chars()
131                .next()
132                .ok_or("start must contain at least one character")?;
133            let start_codepoint = start_char as u32;
134
135            return Self::generate_range(start_codepoint, length);
136        }
137
138        // No chars defined - might be ByteRange mode
139        Ok(String::new())
140    }
141
142    /// Generate a string of sequential Unicode characters from a range.
143    fn generate_range(start: u32, length: usize) -> Result<String, String> {
144        const MAX_UNICODE: u32 = 0x10FFFF;
145        const SURROGATE_START: u32 = 0xD800;
146        const SURROGATE_END: u32 = 0xDFFF;
147
148        if length == 0 {
149            return Err("length must be greater than 0".to_string());
150        }
151
152        let end = start
153            .checked_add(length as u32 - 1)
154            .ok_or("range exceeds maximum Unicode codepoint")?;
155
156        if end > MAX_UNICODE {
157            return Err(format!(
158                "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
159                end, MAX_UNICODE
160            ));
161        }
162
163        // Check for surrogate gap crossing
164        let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
165        if crosses_surrogates {
166            return Err(format!(
167                "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
168                start, end
169            ));
170        }
171
172        let mut result = String::with_capacity(length * 4); // UTF-8 worst case
173        for i in 0..length {
174            let codepoint = start + i as u32;
175            match char::from_u32(codepoint) {
176                Some(c) => result.push(c),
177                None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
178            }
179        }
180
181        Ok(result)
182    }
183
184    /// Returns the effective encoding mode, auto-detecting if not explicitly set.
185    ///
186    /// Auto-detection rules:
187    /// - ByteRange: Must be explicitly set (requires start_codepoint)
188    /// - Chunked: If alphabet length is a power of 2
189    /// - Radix: Otherwise (true base conversion)
190    pub fn effective_mode(&self) -> EncodingMode {
191        if let Some(mode) = &self.mode {
192            return mode.clone();
193        }
194
195        // Auto-detect based on alphabet length
196        let len = if self.start_codepoint.is_some() {
197            // ByteRange must be explicit, but if someone sets start_codepoint
198            // without mode, assume they want ByteRange
199            return EncodingMode::ByteRange;
200        } else if let Some(length) = self.length {
201            // Range-based definition
202            length
203        } else {
204            self.chars.chars().count()
205        };
206
207        if len > 0 && len.is_power_of_two() {
208            EncodingMode::Chunked
209        } else {
210            EncodingMode::Radix
211        }
212    }
213}
214
215fn default_true() -> bool {
216    true
217}
218
219/// Collection of dictionary configurations loaded from TOML files.
220#[derive(Debug, Deserialize)]
221pub struct DictionaryRegistry {
222    /// Map of dictionary names to their configurations
223    pub dictionaries: HashMap<String, DictionaryConfig>,
224    /// Compression algorithm configurations
225    #[serde(default)]
226    pub compression: HashMap<String, CompressionConfig>,
227    /// Global settings
228    #[serde(default)]
229    pub settings: Settings,
230}
231
232/// Configuration for a compression algorithm.
233#[derive(Debug, Deserialize, Clone)]
234pub struct CompressionConfig {
235    /// Default compression level
236    pub default_level: u32,
237}
238
239/// xxHash-specific settings.
240#[derive(Debug, Deserialize, Clone, Default)]
241pub struct XxHashSettings {
242    /// Default seed for xxHash algorithms
243    #[serde(default)]
244    pub default_seed: u64,
245    /// Path to default secret file for XXH3 variants
246    #[serde(default)]
247    pub default_secret_file: Option<String>,
248}
249
250/// Global settings for base-d.
251#[derive(Debug, Deserialize, Clone, Default)]
252pub struct Settings {
253    /// Default dictionary - if not set, requires explicit -e or --dejavu
254    #[serde(default)]
255    pub default_dictionary: Option<String>,
256    /// xxHash configuration
257    #[serde(default)]
258    pub xxhash: XxHashSettings,
259}
260
261impl DictionaryRegistry {
262    /// Parses dictionary configurations from TOML content.
263    pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
264        toml::from_str(content)
265    }
266
267    /// Loads the built-in dictionary configurations.
268    ///
269    /// Returns the default dictionaries bundled with the library.
270    pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
271        Ok(Self {
272            dictionaries: build_registry(),
273            compression: HashMap::new(),
274            settings: Settings::default(),
275        })
276    }
277
278    /// Loads configuration from a custom file path.
279    pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
280        let content = std::fs::read_to_string(path)?;
281        Ok(Self::from_toml(&content)?)
282    }
283
284    /// Loads configuration with user overrides from standard locations.
285    ///
286    /// Searches in priority order:
287    /// 1. Built-in dictionaries (from library)
288    /// 2. `~/.config/base-d/dictionaries.toml` (user overrides)
289    /// 3. `./dictionaries.toml` (project-local overrides)
290    ///
291    /// Later configurations override earlier ones for matching dictionary names.
292    pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
293        let mut config = Self::load_default()?;
294
295        // Try to load user config from ~/.config/base-d/dictionaries.toml
296        if let Some(config_dir) = dirs::config_dir() {
297            let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
298            if user_config_path.exists() {
299                match Self::load_from_file(&user_config_path) {
300                    Ok(user_config) => {
301                        config.merge(user_config);
302                    }
303                    Err(e) => {
304                        eprintln!(
305                            "Warning: Failed to load user config from {:?}: {}",
306                            user_config_path, e
307                        );
308                    }
309                }
310            }
311        }
312
313        // Try to load local config from ./dictionaries.toml
314        let local_config_path = std::path::Path::new("dictionaries.toml");
315        if local_config_path.exists() {
316            match Self::load_from_file(local_config_path) {
317                Ok(local_config) => {
318                    config.merge(local_config);
319                }
320                Err(e) => {
321                    eprintln!(
322                        "Warning: Failed to load local config from {:?}: {}",
323                        local_config_path, e
324                    );
325                }
326            }
327        }
328
329        Ok(config)
330    }
331
332    /// Merges another configuration into this one.
333    ///
334    /// Dictionaries from `other` override dictionaries with the same name in `self`.
335    pub fn merge(&mut self, other: DictionaryRegistry) {
336        for (name, dictionary) in other.dictionaries {
337            self.dictionaries.insert(name, dictionary);
338        }
339    }
340
341    /// Retrieves an dictionary configuration by name.
342    pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
343        self.dictionaries.get(name)
344    }
345
346    /// Builds a ready-to-use Dictionary from a named configuration.
347    ///
348    /// This is a convenience method that handles the common pattern of:
349    /// 1. Looking up the dictionary config
350    /// 2. Getting effective chars
351    /// 3. Building the Dictionary with proper mode/padding
352    ///
353    /// # Example
354    /// ```
355    /// # use base_d::DictionaryRegistry;
356    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
357    /// let registry = DictionaryRegistry::load_default()?;
358    /// let dict = registry.dictionary("base64")?;
359    /// let encoded = base_d::encode(b"Hello", &dict);
360    /// # Ok(())
361    /// # }
362    /// ```
363    pub fn dictionary(
364        &self,
365        name: &str,
366    ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
367    {
368        let config = self.get_dictionary(name).ok_or_else(|| {
369            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
370        })?;
371
372        self.build_dictionary(config).map_err(|e| {
373            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
374        })
375    }
376
377    /// Returns a random dictionary suitable for encoding.
378    ///
379    /// Only selects from dictionaries marked as `common = true` (the default).
380    /// These are dictionaries that render consistently across platforms.
381    ///
382    /// # Example
383    /// ```
384    /// # use base_d::DictionaryRegistry;
385    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
386    /// let registry = DictionaryRegistry::load_default()?;
387    /// let (name, dict) = registry.random()?;
388    /// let encoded = base_d::encode(b"Hello", &dict);
389    /// # Ok(())
390    /// # }
391    /// ```
392    pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
393        use crate::core::dictionary::is_safe_byte_range;
394        use rand::seq::IteratorRandom;
395
396        let common_names: Vec<&String> = self
397            .dictionaries
398            .iter()
399            .filter(|(_, config)| {
400                // Only include common, character-based dictionaries
401                if !config.common || config.dictionary_type != DictionaryType::Char {
402                    return false;
403                }
404
405                // For ByteRange dictionaries, verify the codepoint range is safe
406                // (no NUL, C1 controls, or surrogates in the mapped range)
407                if config.effective_mode() == EncodingMode::ByteRange {
408                    if let Some(start) = config.start_codepoint {
409                        return is_safe_byte_range(start);
410                    }
411                    return false; // ByteRange without start_codepoint is invalid
412                }
413
414                true
415            })
416            .map(|(name, _)| name)
417            .collect();
418
419        let name = common_names
420            .into_iter()
421            .choose(&mut rand::rng())
422            .ok_or("No common dictionaries available")?;
423
424        let dict = self.dictionary(name)?;
425        Ok((name.clone(), dict))
426    }
427
428    /// Returns a list of all dictionary names.
429    pub fn names(&self) -> Vec<&str> {
430        self.dictionaries.keys().map(|s| s.as_str()).collect()
431    }
432
433    /// Returns a list of common dictionary names (suitable for random selection).
434    ///
435    /// Applies the same safety filtering as `random()`: excludes word dictionaries
436    /// and ByteRange dictionaries with unsafe codepoint ranges.
437    pub fn common_names(&self) -> Vec<&str> {
438        use crate::core::dictionary::is_safe_byte_range;
439
440        self.dictionaries
441            .iter()
442            .filter(|(_, config)| {
443                if !config.common || config.dictionary_type != DictionaryType::Char {
444                    return false;
445                }
446
447                // For ByteRange dictionaries, verify the codepoint range is safe
448                if config.effective_mode() == EncodingMode::ByteRange {
449                    if let Some(start) = config.start_codepoint {
450                        return is_safe_byte_range(start);
451                    }
452                    return false;
453                }
454
455                true
456            })
457            .map(|(name, _)| name.as_str())
458            .collect()
459    }
460
461    /// Internal helper to build a Dictionary from a DictionaryConfig.
462    fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
463        use crate::core::config::EncodingMode;
464
465        let mode = config.effective_mode();
466
467        // ByteRange mode uses start_codepoint, not chars
468        if mode == EncodingMode::ByteRange {
469            let start = config
470                .start_codepoint
471                .ok_or("ByteRange mode requires start_codepoint")?;
472            return crate::Dictionary::builder()
473                .mode(mode)
474                .start_codepoint(start)
475                .build();
476        }
477
478        // Get effective chars (handles both explicit and range-based)
479        let chars_str = config.effective_chars()?;
480        let chars: Vec<char> = chars_str.chars().collect();
481
482        // Build with optional padding
483        let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
484
485        if let Some(pad_str) = &config.padding
486            && let Some(pad_char) = pad_str.chars().next()
487        {
488            builder = builder.padding(pad_char);
489        }
490
491        builder.build()
492    }
493
494    /// Builds a WordDictionary from a named configuration.
495    ///
496    /// # Errors
497    ///
498    /// Returns error if:
499    /// - Dictionary not found
500    /// - Dictionary is not word-type
501    /// - Word list file cannot be read
502    /// - Word dictionary building fails
503    ///
504    /// # Example
505    /// ```
506    /// # use base_d::DictionaryRegistry;
507    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
508    /// let registry = DictionaryRegistry::load_default()?;
509    /// // Would work if bip39 is defined as a word dictionary
510    /// // let dict = registry.word_dictionary("bip39")?;
511    /// # Ok(())
512    /// # }
513    /// ```
514    pub fn word_dictionary(
515        &self,
516        name: &str,
517    ) -> Result<crate::WordDictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
518    {
519        let config = self.get_dictionary(name).ok_or_else(|| {
520            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
521        })?;
522
523        // Verify it's a word dictionary
524        if config.dictionary_type != DictionaryType::Word {
525            return Err(
526                crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
527                    name,
528                    format!(
529                        "Dictionary '{}' is not a word dictionary (type is {:?})",
530                        name, config.dictionary_type
531                    ),
532                ),
533            );
534        }
535
536        self.build_word_dictionary(config).map_err(|e| {
537            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
538        })
539    }
540
541    /// Internal helper to build a WordDictionary from a DictionaryConfig.
542    fn build_word_dictionary(
543        &self,
544        config: &DictionaryConfig,
545    ) -> Result<crate::WordDictionary, String> {
546        let mut builder = crate::WordDictionary::builder();
547
548        // Get words from inline list, file, or builtin
549        if let Some(ref words) = config.words {
550            builder = builder.words(words.clone());
551        } else if let Some(ref words_file) = config.words_file {
552            // Check for embedded word lists first (generated by build.rs)
553            let content = if let Some(embedded) = get_embedded_wordlist(words_file) {
554                embedded.to_string()
555            } else {
556                // Check for builtin word lists
557                match words_file.as_str() {
558                    "builtin:bip39" | "builtin:bip39-english" => {
559                        crate::wordlists::BIP39_ENGLISH.to_string()
560                    }
561                    "builtin:eff_long" | "builtin:eff-long" => {
562                        crate::wordlists::EFF_LONG.to_string()
563                    }
564                    "builtin:eff_short1" | "builtin:eff-short1" => {
565                        crate::wordlists::EFF_SHORT1.to_string()
566                    }
567                    "builtin:eff_short2" | "builtin:eff-short2" => {
568                        crate::wordlists::EFF_SHORT2.to_string()
569                    }
570                    "builtin:diceware" => crate::wordlists::DICEWARE.to_string(),
571                    "builtin:pgp_even" | "builtin:pgp-even" => {
572                        crate::wordlists::PGP_EVEN.to_string()
573                    }
574                    "builtin:pgp_odd" | "builtin:pgp-odd" => crate::wordlists::PGP_ODD.to_string(),
575                    "builtin:nato" => crate::wordlists::NATO.to_string(),
576                    "builtin:buzzwords" => crate::wordlists::BUZZWORDS.to_string(),
577                    "builtin:klingon" => crate::wordlists::KLINGON.to_string(),
578                    "builtin:pokemon" => crate::wordlists::POKEMON.to_string(),
579                    _ => {
580                        // Resolve path (support ~ expansion)
581                        let expanded = shellexpand::tilde(words_file);
582                        std::fs::read_to_string(expanded.as_ref()).map_err(|e| {
583                            format!("Failed to read words file '{}': {}", words_file, e)
584                        })?
585                    }
586                }
587            };
588            builder = builder.words_from_str(&content);
589        } else {
590            return Err("Word dictionary must have 'words' or 'words_file'".to_string());
591        }
592
593        // Set optional delimiter
594        if let Some(ref delimiter) = config.delimiter {
595            builder = builder.delimiter(delimiter.clone());
596        }
597
598        // Set case sensitivity
599        if let Some(case_sensitive) = config.case_sensitive {
600            builder = builder.case_sensitive(case_sensitive);
601        }
602
603        builder.build()
604    }
605
606    /// Builds an AlternatingWordDictionary from a named configuration.
607    ///
608    /// This is used for PGP-style biometric word lists where even/odd bytes
609    /// use different dictionaries.
610    ///
611    /// # Errors
612    ///
613    /// Returns error if:
614    /// - Dictionary not found
615    /// - Dictionary is not word-type
616    /// - Dictionary does not have alternating field set
617    /// - Any of the sub-dictionaries cannot be loaded
618    ///
619    /// # Example
620    /// ```ignore
621    /// # use base_d::DictionaryRegistry;
622    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
623    /// let registry = DictionaryRegistry::load_default()?;
624    /// let dict = registry.alternating_word_dictionary("pgp")?;
625    /// # Ok(())
626    /// # }
627    /// ```
628    pub fn alternating_word_dictionary(
629        &self,
630        name: &str,
631    ) -> Result<
632        crate::AlternatingWordDictionary,
633        crate::encoders::algorithms::errors::DictionaryNotFoundError,
634    > {
635        let config = self.get_dictionary(name).ok_or_else(|| {
636            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
637        })?;
638
639        // Verify it's a word dictionary
640        if config.dictionary_type != DictionaryType::Word {
641            return Err(
642                crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
643                    name,
644                    format!(
645                        "Dictionary '{}' is not a word dictionary (type is {:?})",
646                        name, config.dictionary_type
647                    ),
648                ),
649            );
650        }
651
652        // Verify it has alternating field
653        let alternating_names = config.alternating.as_ref().ok_or_else(|| {
654            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
655                name,
656                format!(
657                    "Dictionary '{}' is not an alternating dictionary (missing 'alternating' field)",
658                    name
659                ),
660            )
661        })?;
662
663        self.build_alternating_word_dictionary(config, alternating_names)
664            .map_err(|e| {
665                crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
666            })
667    }
668
669    /// Internal helper to build an AlternatingWordDictionary from a DictionaryConfig.
670    fn build_alternating_word_dictionary(
671        &self,
672        config: &DictionaryConfig,
673        alternating_names: &[String],
674    ) -> Result<crate::AlternatingWordDictionary, String> {
675        if alternating_names.is_empty() {
676            return Err("Alternating dictionary must have at least one sub-dictionary".to_string());
677        }
678
679        // Load all sub-dictionaries
680        let mut dictionaries = Vec::with_capacity(alternating_names.len());
681        for dict_name in alternating_names {
682            let sub_dict = self
683                .word_dictionary(dict_name)
684                .map_err(|e| format!("Failed to load sub-dictionary '{}': {}", dict_name, e))?;
685            dictionaries.push(sub_dict);
686        }
687
688        // Get delimiter from parent config
689        let delimiter = config.delimiter.clone().unwrap_or_else(|| " ".to_string());
690        // Note: case_sensitive is now handled by individual sub-dictionaries
691
692        Ok(crate::AlternatingWordDictionary::new(
693            dictionaries,
694            delimiter,
695        ))
696    }
697
698    /// Returns the dictionary type for a named dictionary.
699    ///
700    /// Returns `None` if the dictionary is not found.
701    pub fn dictionary_type(&self, name: &str) -> Option<DictionaryType> {
702        self.get_dictionary(name).map(|c| c.dictionary_type.clone())
703    }
704
705    /// Checks if a dictionary is word-based.
706    pub fn is_word_dictionary(&self, name: &str) -> bool {
707        self.dictionary_type(name) == Some(DictionaryType::Word)
708    }
709}
710
711#[cfg(test)]
712mod tests {
713    use super::*;
714
715    #[test]
716    fn test_load_default_config() {
717        let config = DictionaryRegistry::load_default().unwrap();
718        assert!(config.dictionaries.contains_key("cards"));
719    }
720
721    #[test]
722    fn test_cards_dictionary_length() {
723        let config = DictionaryRegistry::load_default().unwrap();
724        let cards = config.get_dictionary("cards").unwrap();
725        assert_eq!(cards.chars.chars().count(), 52);
726    }
727
728    #[test]
729    fn test_base64_chunked_mode() {
730        let config = DictionaryRegistry::load_default().unwrap();
731        let base64 = config.get_dictionary("base64").unwrap();
732        assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
733        assert_eq!(base64.padding, Some("=".to_string()));
734    }
735
736    #[test]
737    fn test_base64_radix_mode() {
738        let config = DictionaryRegistry::load_default().unwrap();
739        let base64_radix = config.get_dictionary("base64_radix").unwrap();
740        assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
741    }
742
743    #[test]
744    fn test_auto_detection_power_of_two() {
745        // Power of 2 → Chunked
746        let config = DictionaryConfig {
747            chars: "ABCD".to_string(), // 4 = 2^2
748            ..Default::default()
749        };
750        assert_eq!(config.effective_mode(), EncodingMode::Chunked);
751
752        // Not power of 2 → Radix
753        let config = DictionaryConfig {
754            chars: "ABC".to_string(), // 3 ≠ 2^n
755            ..Default::default()
756        };
757        assert_eq!(config.effective_mode(), EncodingMode::Radix);
758    }
759
760    #[test]
761    fn test_explicit_mode_override() {
762        // Explicit mode overrides auto-detection
763        let config = DictionaryConfig {
764            chars: "ABCD".to_string(),       // Would be Chunked
765            mode: Some(EncodingMode::Radix), // But explicitly set to Radix
766            ..Default::default()
767        };
768        assert_eq!(config.effective_mode(), EncodingMode::Radix);
769    }
770
771    #[test]
772    fn test_merge_configs() {
773        let mut config1 = DictionaryRegistry {
774            dictionaries: HashMap::new(),
775            compression: HashMap::new(),
776            settings: Settings::default(),
777        };
778        config1.dictionaries.insert(
779            "test1".to_string(),
780            DictionaryConfig {
781                chars: "ABC".to_string(),
782                mode: Some(EncodingMode::Radix),
783                ..Default::default()
784            },
785        );
786
787        let mut config2 = DictionaryRegistry {
788            dictionaries: HashMap::new(),
789            compression: HashMap::new(),
790            settings: Settings::default(),
791        };
792        config2.dictionaries.insert(
793            "test2".to_string(),
794            DictionaryConfig {
795                chars: "XYZ".to_string(),
796                mode: Some(EncodingMode::Radix),
797                ..Default::default()
798            },
799        );
800        config2.dictionaries.insert(
801            "test1".to_string(),
802            DictionaryConfig {
803                chars: "DEF".to_string(),
804                mode: Some(EncodingMode::Radix),
805                ..Default::default()
806            },
807        );
808
809        config1.merge(config2);
810
811        assert_eq!(config1.dictionaries.len(), 2);
812        assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
813        assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
814    }
815
816    #[test]
817    fn test_load_from_toml_string() {
818        let toml_content = r#"
819[dictionaries.custom]
820chars = "0123456789"
821mode = "base_conversion"
822"#;
823        let config = DictionaryRegistry::from_toml(toml_content).unwrap();
824        assert!(config.dictionaries.contains_key("custom"));
825        assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
826    }
827
828    #[test]
829    fn test_effective_chars_from_explicit() {
830        let config = DictionaryConfig {
831            chars: "ABCD".to_string(),
832            ..Default::default()
833        };
834        assert_eq!(config.effective_chars().unwrap(), "ABCD");
835    }
836
837    #[test]
838    fn test_effective_chars_from_range() {
839        let config = DictionaryConfig {
840            start: Some("A".to_string()),
841            length: Some(4),
842            ..Default::default()
843        };
844        assert_eq!(config.effective_chars().unwrap(), "ABCD");
845    }
846
847    #[test]
848    fn test_effective_chars_explicit_takes_priority() {
849        // Explicit chars should override start+length
850        let config = DictionaryConfig {
851            chars: "XYZ".to_string(),
852            start: Some("A".to_string()),
853            length: Some(4),
854            ..Default::default()
855        };
856        assert_eq!(config.effective_chars().unwrap(), "XYZ");
857    }
858
859    #[test]
860    fn test_effective_chars_unicode_range() {
861        // Test generating a range starting from a Unicode character
862        let config = DictionaryConfig {
863            start: Some("가".to_string()), // Korean Hangul U+AC00
864            length: Some(4),
865            ..Default::default()
866        };
867        let result = config.effective_chars().unwrap();
868        assert_eq!(result.chars().count(), 4);
869        assert_eq!(result, "가각갂갃");
870    }
871
872    #[test]
873    fn test_effective_chars_surrogate_gap_error() {
874        // Range crossing surrogate gap should error
875        let config = DictionaryConfig {
876            start: Some("\u{D700}".to_string()), // Just before surrogates
877            length: Some(512),                   // Would cross into surrogate range
878            ..Default::default()
879        };
880        assert!(config.effective_chars().is_err());
881    }
882
883    #[test]
884    fn test_effective_chars_exceeds_unicode_max() {
885        // Range exceeding max Unicode should error
886        let config = DictionaryConfig {
887            start: Some("\u{10FFFE}".to_string()), // Near end of Unicode
888            length: Some(10),                      // Would exceed U+10FFFF
889            ..Default::default()
890        };
891        assert!(config.effective_chars().is_err());
892    }
893
894    #[test]
895    fn test_effective_mode_with_length_field() {
896        // Auto-detect should use length field when chars is empty
897        let config = DictionaryConfig {
898            start: Some("A".to_string()),
899            length: Some(64), // 64 = 2^6 → Chunked
900            ..Default::default()
901        };
902        assert_eq!(config.effective_mode(), EncodingMode::Chunked);
903
904        let config = DictionaryConfig {
905            start: Some("A".to_string()),
906            length: Some(52), // 52 ≠ 2^n → Radix
907            ..Default::default()
908        };
909        assert_eq!(config.effective_mode(), EncodingMode::Radix);
910    }
911}