base_d/core/
config.rs

1use serde::Deserialize;
2use std::collections::HashMap;
3
4// Include generated dictionary registry from build.rs
5include!(concat!(env!("OUT_DIR"), "/registry.rs"));
6
7/// Dictionary type: character-based or word-based.
8#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)]
9#[serde(rename_all = "snake_case")]
10pub enum DictionaryType {
11    /// Character-based dictionary (traditional encoding)
12    #[default]
13    Char,
14    /// Word-based dictionary (BIP-39, Diceware, etc.)
15    Word,
16}
17
18/// Encoding strategy for converting binary data to text.
19///
20/// Different modes offer different tradeoffs between efficiency, compatibility,
21/// and features.
22#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
23#[serde(rename_all = "snake_case")]
24#[derive(Default)]
25pub enum EncodingMode {
26    /// True radix/base conversion treating data as a large number.
27    /// Works with any dictionary size. Output length varies with input.
28    /// Requires entire input before producing output (not streamable).
29    #[default]
30    #[serde(alias = "base_conversion")]
31    Radix,
32    /// Fixed-size bit chunking per RFC 4648.
33    /// Requires power-of-two dictionary size. Supports padding.
34    Chunked,
35    /// Direct 1:1 byte-to-character mapping using Unicode codepoint ranges.
36    /// Zero encoding overhead. Always 256 characters.
37    ByteRange,
38}
39
40/// Configuration for a single dictionary loaded from TOML.
41#[derive(Debug, Deserialize, Clone)]
42pub struct DictionaryConfig {
43    // === Type discriminant ===
44    /// Dictionary type: "char" (default) or "word"
45    #[serde(default, rename = "type")]
46    pub dictionary_type: DictionaryType,
47
48    // === Character-based fields ===
49    /// The characters comprising the dictionary (explicit list)
50    #[serde(default)]
51    pub chars: String,
52    /// Starting character for range-based dictionary definition
53    /// Use with `length` to define sequential Unicode ranges
54    #[serde(default)]
55    pub start: Option<String>,
56    /// Number of characters in range-based dictionary
57    /// Use with `start` to define sequential Unicode ranges
58    #[serde(default)]
59    pub length: Option<usize>,
60    /// Starting Unicode codepoint for ByteRange mode (256 chars)
61    #[serde(default)]
62    pub start_codepoint: Option<u32>,
63
64    // === Word-based fields ===
65    /// Inline word list for word-based dictionaries
66    #[serde(default)]
67    pub words: Option<Vec<String>>,
68    /// Path to external word list file (one word per line)
69    #[serde(default)]
70    pub words_file: Option<String>,
71    /// Delimiter between words in encoded output (default: " ")
72    #[serde(default)]
73    pub delimiter: Option<String>,
74    /// Whether word matching is case-sensitive (default: false)
75    #[serde(default)]
76    pub case_sensitive: Option<bool>,
77    /// Names of sub-dictionaries for alternating word encoding (e.g., ["pgp_even", "pgp_odd"])
78    #[serde(default)]
79    pub alternating: Option<Vec<String>>,
80
81    // === Common fields ===
82    /// The encoding mode to use (auto-detected if not specified)
83    #[serde(default)]
84    pub mode: Option<EncodingMode>,
85    /// Optional padding character (e.g., "=" for base64)
86    #[serde(default)]
87    pub padding: Option<String>,
88    /// Whether this dictionary renders consistently across platforms (default: true)
89    /// Dictionaries with common=false are excluded from random selection (--dejavu)
90    #[serde(default = "default_true")]
91    pub common: bool,
92}
93
94impl Default for DictionaryConfig {
95    fn default() -> Self {
96        Self {
97            dictionary_type: DictionaryType::default(),
98            chars: String::new(),
99            start: None,
100            length: None,
101            start_codepoint: None,
102            words: None,
103            words_file: None,
104            delimiter: None,
105            case_sensitive: None,
106            alternating: None,
107            mode: None,
108            padding: None,
109            common: true, // default to common for random selection
110        }
111    }
112}
113
114impl DictionaryConfig {
115    /// Returns the effective character set, generating from range if needed.
116    ///
117    /// Priority:
118    /// 1. If `chars` is non-empty, use it directly
119    /// 2. If `start` + `length` are set, generate sequential range
120    /// 3. Otherwise return empty string (ByteRange mode uses start_codepoint instead)
121    pub fn effective_chars(&self) -> Result<String, String> {
122        // Explicit chars take priority
123        if !self.chars.is_empty() {
124            return Ok(self.chars.clone());
125        }
126
127        // Generate from start + length range
128        if let (Some(start_str), Some(length)) = (&self.start, self.length) {
129            let start_char = start_str
130                .chars()
131                .next()
132                .ok_or("start must contain at least one character")?;
133            let start_codepoint = start_char as u32;
134
135            return Self::generate_range(start_codepoint, length);
136        }
137
138        // No chars defined - might be ByteRange mode
139        Ok(String::new())
140    }
141
142    /// Generate a string of sequential Unicode characters from a range.
143    fn generate_range(start: u32, length: usize) -> Result<String, String> {
144        const MAX_UNICODE: u32 = 0x10FFFF;
145        const SURROGATE_START: u32 = 0xD800;
146        const SURROGATE_END: u32 = 0xDFFF;
147
148        if length == 0 {
149            return Err("length must be greater than 0".to_string());
150        }
151
152        let end = start
153            .checked_add(length as u32 - 1)
154            .ok_or("range exceeds maximum Unicode codepoint")?;
155
156        if end > MAX_UNICODE {
157            return Err(format!(
158                "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
159                end, MAX_UNICODE
160            ));
161        }
162
163        // Check for surrogate gap crossing
164        let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
165        if crosses_surrogates {
166            return Err(format!(
167                "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
168                start, end
169            ));
170        }
171
172        let mut result = String::with_capacity(length * 4); // UTF-8 worst case
173        for i in 0..length {
174            let codepoint = start + i as u32;
175            match char::from_u32(codepoint) {
176                Some(c) => result.push(c),
177                None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
178            }
179        }
180
181        Ok(result)
182    }
183
184    /// Returns the effective encoding mode, auto-detecting if not explicitly set.
185    ///
186    /// Auto-detection rules:
187    /// - ByteRange: Must be explicitly set (requires start_codepoint)
188    /// - Chunked: If alphabet length is a power of 2
189    /// - Radix: Otherwise (true base conversion)
190    pub fn effective_mode(&self) -> EncodingMode {
191        if let Some(mode) = &self.mode {
192            return mode.clone();
193        }
194
195        // Auto-detect based on alphabet length
196        let len = if self.start_codepoint.is_some() {
197            // ByteRange must be explicit, but if someone sets start_codepoint
198            // without mode, assume they want ByteRange
199            return EncodingMode::ByteRange;
200        } else if let Some(length) = self.length {
201            // Range-based definition
202            length
203        } else {
204            self.chars.chars().count()
205        };
206
207        if len > 0 && len.is_power_of_two() {
208            EncodingMode::Chunked
209        } else {
210            EncodingMode::Radix
211        }
212    }
213}
214
215fn default_true() -> bool {
216    true
217}
218
219/// Collection of dictionary configurations loaded from TOML files.
220#[derive(Debug, Deserialize)]
221pub struct DictionaryRegistry {
222    /// Map of dictionary names to their configurations
223    pub dictionaries: HashMap<String, DictionaryConfig>,
224    /// Compression algorithm configurations
225    #[serde(default)]
226    pub compression: HashMap<String, CompressionConfig>,
227    /// Global settings
228    #[serde(default)]
229    pub settings: Settings,
230}
231
232/// Configuration for a compression algorithm.
233#[derive(Debug, Deserialize, Clone)]
234pub struct CompressionConfig {
235    /// Default compression level
236    pub default_level: u32,
237}
238
239/// xxHash-specific settings.
240#[derive(Debug, Deserialize, Clone, Default)]
241pub struct XxHashSettings {
242    /// Default seed for xxHash algorithms
243    #[serde(default)]
244    pub default_seed: u64,
245    /// Path to default secret file for XXH3 variants
246    #[serde(default)]
247    pub default_secret_file: Option<String>,
248}
249
250/// Global settings for base-d.
251#[derive(Debug, Deserialize, Clone, Default)]
252pub struct Settings {
253    /// Default dictionary - if not set, requires explicit -e or --dejavu
254    #[serde(default)]
255    pub default_dictionary: Option<String>,
256    /// xxHash configuration
257    #[serde(default)]
258    pub xxhash: XxHashSettings,
259}
260
261impl DictionaryRegistry {
262    /// Parses dictionary configurations from TOML content.
263    pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
264        toml::from_str(content)
265    }
266
267    /// Loads the built-in dictionary configurations.
268    ///
269    /// Returns the default dictionaries bundled with the library.
270    pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
271        Ok(Self {
272            dictionaries: build_registry(),
273            compression: HashMap::new(),
274            settings: Settings::default(),
275        })
276    }
277
278    /// Loads configuration from a custom file path.
279    pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
280        let content = std::fs::read_to_string(path)?;
281        Ok(Self::from_toml(&content)?)
282    }
283
284    /// Loads configuration with user overrides from standard locations.
285    ///
286    /// Searches in priority order:
287    /// 1. Built-in dictionaries (from library)
288    /// 2. `~/.config/base-d/dictionaries.toml` (user overrides)
289    /// 3. `./dictionaries.toml` (project-local overrides)
290    ///
291    /// Later configurations override earlier ones for matching dictionary names.
292    pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
293        let mut config = Self::load_default()?;
294
295        // Try to load user config from ~/.config/base-d/dictionaries.toml
296        if let Some(config_dir) = dirs::config_dir() {
297            let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
298            if user_config_path.exists() {
299                match Self::load_from_file(&user_config_path) {
300                    Ok(user_config) => {
301                        config.merge(user_config);
302                    }
303                    Err(e) => {
304                        eprintln!(
305                            "Warning: Failed to load user config from {:?}: {}",
306                            user_config_path, e
307                        );
308                    }
309                }
310            }
311        }
312
313        // Try to load local config from ./dictionaries.toml
314        let local_config_path = std::path::Path::new("dictionaries.toml");
315        if local_config_path.exists() {
316            match Self::load_from_file(local_config_path) {
317                Ok(local_config) => {
318                    config.merge(local_config);
319                }
320                Err(e) => {
321                    eprintln!(
322                        "Warning: Failed to load local config from {:?}: {}",
323                        local_config_path, e
324                    );
325                }
326            }
327        }
328
329        Ok(config)
330    }
331
332    /// Merges another configuration into this one.
333    ///
334    /// Dictionaries from `other` override dictionaries with the same name in `self`.
335    pub fn merge(&mut self, other: DictionaryRegistry) {
336        for (name, dictionary) in other.dictionaries {
337            self.dictionaries.insert(name, dictionary);
338        }
339    }
340
341    /// Retrieves an dictionary configuration by name.
342    pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
343        self.dictionaries.get(name)
344    }
345
346    /// Builds a ready-to-use Dictionary from a named configuration.
347    ///
348    /// This is a convenience method that handles the common pattern of:
349    /// 1. Looking up the dictionary config
350    /// 2. Getting effective chars
351    /// 3. Building the Dictionary with proper mode/padding
352    ///
353    /// # Example
354    /// ```
355    /// # use base_d::DictionaryRegistry;
356    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
357    /// let registry = DictionaryRegistry::load_default()?;
358    /// let dict = registry.dictionary("base64")?;
359    /// let encoded = base_d::encode(b"Hello", &dict);
360    /// # Ok(())
361    /// # }
362    /// ```
363    pub fn dictionary(
364        &self,
365        name: &str,
366    ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
367    {
368        let config = self.get_dictionary(name).ok_or_else(|| {
369            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
370        })?;
371
372        self.build_dictionary(config).map_err(|e| {
373            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
374        })
375    }
376
377    /// Returns a random dictionary suitable for encoding.
378    ///
379    /// Only selects from dictionaries marked as `common = true` (the default).
380    /// These are dictionaries that render consistently across platforms.
381    ///
382    /// # Example
383    /// ```
384    /// # use base_d::DictionaryRegistry;
385    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
386    /// let registry = DictionaryRegistry::load_default()?;
387    /// let (name, dict) = registry.random()?;
388    /// let encoded = base_d::encode(b"Hello", &dict);
389    /// # Ok(())
390    /// # }
391    /// ```
392    pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
393        use rand::seq::IteratorRandom;
394
395        let common_names: Vec<&String> = self
396            .dictionaries
397            .iter()
398            .filter(|(_, config)| {
399                // Only include common, character-based dictionaries
400                config.common && config.dictionary_type == DictionaryType::Char
401            })
402            .map(|(name, _)| name)
403            .collect();
404
405        let name = common_names
406            .into_iter()
407            .choose(&mut rand::rng())
408            .ok_or("No common dictionaries available")?;
409
410        let dict = self.dictionary(name)?;
411        Ok((name.clone(), dict))
412    }
413
414    /// Returns a list of all dictionary names.
415    pub fn names(&self) -> Vec<&str> {
416        self.dictionaries.keys().map(|s| s.as_str()).collect()
417    }
418
419    /// Returns a list of common dictionary names (suitable for random selection).
420    pub fn common_names(&self) -> Vec<&str> {
421        self.dictionaries
422            .iter()
423            .filter(|(_, config)| config.common)
424            .map(|(name, _)| name.as_str())
425            .collect()
426    }
427
428    /// Internal helper to build a Dictionary from a DictionaryConfig.
429    fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
430        use crate::core::config::EncodingMode;
431
432        let mode = config.effective_mode();
433
434        // ByteRange mode uses start_codepoint, not chars
435        if mode == EncodingMode::ByteRange {
436            let start = config
437                .start_codepoint
438                .ok_or("ByteRange mode requires start_codepoint")?;
439            return crate::Dictionary::builder()
440                .mode(mode)
441                .start_codepoint(start)
442                .build();
443        }
444
445        // Get effective chars (handles both explicit and range-based)
446        let chars_str = config.effective_chars()?;
447        let chars: Vec<char> = chars_str.chars().collect();
448
449        // Build with optional padding
450        let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
451
452        if let Some(pad_str) = &config.padding
453            && let Some(pad_char) = pad_str.chars().next()
454        {
455            builder = builder.padding(pad_char);
456        }
457
458        builder.build()
459    }
460
461    /// Builds a WordDictionary from a named configuration.
462    ///
463    /// # Errors
464    ///
465    /// Returns error if:
466    /// - Dictionary not found
467    /// - Dictionary is not word-type
468    /// - Word list file cannot be read
469    /// - Word dictionary building fails
470    ///
471    /// # Example
472    /// ```
473    /// # use base_d::DictionaryRegistry;
474    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
475    /// let registry = DictionaryRegistry::load_default()?;
476    /// // Would work if bip39 is defined as a word dictionary
477    /// // let dict = registry.word_dictionary("bip39")?;
478    /// # Ok(())
479    /// # }
480    /// ```
481    pub fn word_dictionary(
482        &self,
483        name: &str,
484    ) -> Result<crate::WordDictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
485    {
486        let config = self.get_dictionary(name).ok_or_else(|| {
487            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
488        })?;
489
490        // Verify it's a word dictionary
491        if config.dictionary_type != DictionaryType::Word {
492            return Err(
493                crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
494                    name,
495                    format!(
496                        "Dictionary '{}' is not a word dictionary (type is {:?})",
497                        name, config.dictionary_type
498                    ),
499                ),
500            );
501        }
502
503        self.build_word_dictionary(config).map_err(|e| {
504            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
505        })
506    }
507
508    /// Internal helper to build a WordDictionary from a DictionaryConfig.
509    fn build_word_dictionary(
510        &self,
511        config: &DictionaryConfig,
512    ) -> Result<crate::WordDictionary, String> {
513        let mut builder = crate::WordDictionary::builder();
514
515        // Get words from inline list, file, or builtin
516        if let Some(ref words) = config.words {
517            builder = builder.words(words.clone());
518        } else if let Some(ref words_file) = config.words_file {
519            // Check for embedded word lists first (generated by build.rs)
520            let content = if let Some(embedded) = get_embedded_wordlist(words_file) {
521                embedded.to_string()
522            } else {
523                // Check for builtin word lists
524                match words_file.as_str() {
525                    "builtin:bip39" | "builtin:bip39-english" => {
526                        crate::wordlists::BIP39_ENGLISH.to_string()
527                    }
528                    "builtin:eff_long" | "builtin:eff-long" => {
529                        crate::wordlists::EFF_LONG.to_string()
530                    }
531                    "builtin:eff_short1" | "builtin:eff-short1" => {
532                        crate::wordlists::EFF_SHORT1.to_string()
533                    }
534                    "builtin:eff_short2" | "builtin:eff-short2" => {
535                        crate::wordlists::EFF_SHORT2.to_string()
536                    }
537                    "builtin:diceware" => crate::wordlists::DICEWARE.to_string(),
538                    "builtin:pgp_even" | "builtin:pgp-even" => {
539                        crate::wordlists::PGP_EVEN.to_string()
540                    }
541                    "builtin:pgp_odd" | "builtin:pgp-odd" => crate::wordlists::PGP_ODD.to_string(),
542                    "builtin:nato" => crate::wordlists::NATO.to_string(),
543                    "builtin:buzzwords" => crate::wordlists::BUZZWORDS.to_string(),
544                    "builtin:klingon" => crate::wordlists::KLINGON.to_string(),
545                    "builtin:pokemon" => crate::wordlists::POKEMON.to_string(),
546                    _ => {
547                        // Resolve path (support ~ expansion)
548                        let expanded = shellexpand::tilde(words_file);
549                        std::fs::read_to_string(expanded.as_ref()).map_err(|e| {
550                            format!("Failed to read words file '{}': {}", words_file, e)
551                        })?
552                    }
553                }
554            };
555            builder = builder.words_from_str(&content);
556        } else {
557            return Err("Word dictionary must have 'words' or 'words_file'".to_string());
558        }
559
560        // Set optional delimiter
561        if let Some(ref delimiter) = config.delimiter {
562            builder = builder.delimiter(delimiter.clone());
563        }
564
565        // Set case sensitivity
566        if let Some(case_sensitive) = config.case_sensitive {
567            builder = builder.case_sensitive(case_sensitive);
568        }
569
570        builder.build()
571    }
572
573    /// Builds an AlternatingWordDictionary from a named configuration.
574    ///
575    /// This is used for PGP-style biometric word lists where even/odd bytes
576    /// use different dictionaries.
577    ///
578    /// # Errors
579    ///
580    /// Returns error if:
581    /// - Dictionary not found
582    /// - Dictionary is not word-type
583    /// - Dictionary does not have alternating field set
584    /// - Any of the sub-dictionaries cannot be loaded
585    ///
586    /// # Example
587    /// ```ignore
588    /// # use base_d::DictionaryRegistry;
589    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
590    /// let registry = DictionaryRegistry::load_default()?;
591    /// let dict = registry.alternating_word_dictionary("pgp")?;
592    /// # Ok(())
593    /// # }
594    /// ```
595    pub fn alternating_word_dictionary(
596        &self,
597        name: &str,
598    ) -> Result<
599        crate::AlternatingWordDictionary,
600        crate::encoders::algorithms::errors::DictionaryNotFoundError,
601    > {
602        let config = self.get_dictionary(name).ok_or_else(|| {
603            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
604        })?;
605
606        // Verify it's a word dictionary
607        if config.dictionary_type != DictionaryType::Word {
608            return Err(
609                crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
610                    name,
611                    format!(
612                        "Dictionary '{}' is not a word dictionary (type is {:?})",
613                        name, config.dictionary_type
614                    ),
615                ),
616            );
617        }
618
619        // Verify it has alternating field
620        let alternating_names = config.alternating.as_ref().ok_or_else(|| {
621            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
622                name,
623                format!(
624                    "Dictionary '{}' is not an alternating dictionary (missing 'alternating' field)",
625                    name
626                ),
627            )
628        })?;
629
630        self.build_alternating_word_dictionary(config, alternating_names)
631            .map_err(|e| {
632                crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
633            })
634    }
635
636    /// Internal helper to build an AlternatingWordDictionary from a DictionaryConfig.
637    fn build_alternating_word_dictionary(
638        &self,
639        config: &DictionaryConfig,
640        alternating_names: &[String],
641    ) -> Result<crate::AlternatingWordDictionary, String> {
642        if alternating_names.is_empty() {
643            return Err("Alternating dictionary must have at least one sub-dictionary".to_string());
644        }
645
646        // Load all sub-dictionaries
647        let mut dictionaries = Vec::with_capacity(alternating_names.len());
648        for dict_name in alternating_names {
649            let sub_dict = self
650                .word_dictionary(dict_name)
651                .map_err(|e| format!("Failed to load sub-dictionary '{}': {}", dict_name, e))?;
652            dictionaries.push(sub_dict);
653        }
654
655        // Get delimiter from parent config
656        let delimiter = config.delimiter.clone().unwrap_or_else(|| " ".to_string());
657        // Note: case_sensitive is now handled by individual sub-dictionaries
658
659        Ok(crate::AlternatingWordDictionary::new(
660            dictionaries,
661            delimiter,
662        ))
663    }
664
665    /// Returns the dictionary type for a named dictionary.
666    ///
667    /// Returns `None` if the dictionary is not found.
668    pub fn dictionary_type(&self, name: &str) -> Option<DictionaryType> {
669        self.get_dictionary(name).map(|c| c.dictionary_type.clone())
670    }
671
672    /// Checks if a dictionary is word-based.
673    pub fn is_word_dictionary(&self, name: &str) -> bool {
674        self.dictionary_type(name) == Some(DictionaryType::Word)
675    }
676}
677
678#[cfg(test)]
679mod tests {
680    use super::*;
681
682    #[test]
683    fn test_load_default_config() {
684        let config = DictionaryRegistry::load_default().unwrap();
685        assert!(config.dictionaries.contains_key("cards"));
686    }
687
688    #[test]
689    fn test_cards_dictionary_length() {
690        let config = DictionaryRegistry::load_default().unwrap();
691        let cards = config.get_dictionary("cards").unwrap();
692        assert_eq!(cards.chars.chars().count(), 52);
693    }
694
695    #[test]
696    fn test_base64_chunked_mode() {
697        let config = DictionaryRegistry::load_default().unwrap();
698        let base64 = config.get_dictionary("base64").unwrap();
699        assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
700        assert_eq!(base64.padding, Some("=".to_string()));
701    }
702
703    #[test]
704    fn test_base64_radix_mode() {
705        let config = DictionaryRegistry::load_default().unwrap();
706        let base64_radix = config.get_dictionary("base64_radix").unwrap();
707        assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
708    }
709
710    #[test]
711    fn test_auto_detection_power_of_two() {
712        // Power of 2 → Chunked
713        let config = DictionaryConfig {
714            chars: "ABCD".to_string(), // 4 = 2^2
715            ..Default::default()
716        };
717        assert_eq!(config.effective_mode(), EncodingMode::Chunked);
718
719        // Not power of 2 → Radix
720        let config = DictionaryConfig {
721            chars: "ABC".to_string(), // 3 ≠ 2^n
722            ..Default::default()
723        };
724        assert_eq!(config.effective_mode(), EncodingMode::Radix);
725    }
726
727    #[test]
728    fn test_explicit_mode_override() {
729        // Explicit mode overrides auto-detection
730        let config = DictionaryConfig {
731            chars: "ABCD".to_string(),       // Would be Chunked
732            mode: Some(EncodingMode::Radix), // But explicitly set to Radix
733            ..Default::default()
734        };
735        assert_eq!(config.effective_mode(), EncodingMode::Radix);
736    }
737
738    #[test]
739    fn test_merge_configs() {
740        let mut config1 = DictionaryRegistry {
741            dictionaries: HashMap::new(),
742            compression: HashMap::new(),
743            settings: Settings::default(),
744        };
745        config1.dictionaries.insert(
746            "test1".to_string(),
747            DictionaryConfig {
748                chars: "ABC".to_string(),
749                mode: Some(EncodingMode::Radix),
750                ..Default::default()
751            },
752        );
753
754        let mut config2 = DictionaryRegistry {
755            dictionaries: HashMap::new(),
756            compression: HashMap::new(),
757            settings: Settings::default(),
758        };
759        config2.dictionaries.insert(
760            "test2".to_string(),
761            DictionaryConfig {
762                chars: "XYZ".to_string(),
763                mode: Some(EncodingMode::Radix),
764                ..Default::default()
765            },
766        );
767        config2.dictionaries.insert(
768            "test1".to_string(),
769            DictionaryConfig {
770                chars: "DEF".to_string(),
771                mode: Some(EncodingMode::Radix),
772                ..Default::default()
773            },
774        );
775
776        config1.merge(config2);
777
778        assert_eq!(config1.dictionaries.len(), 2);
779        assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
780        assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
781    }
782
783    #[test]
784    fn test_load_from_toml_string() {
785        let toml_content = r#"
786[dictionaries.custom]
787chars = "0123456789"
788mode = "base_conversion"
789"#;
790        let config = DictionaryRegistry::from_toml(toml_content).unwrap();
791        assert!(config.dictionaries.contains_key("custom"));
792        assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
793    }
794
795    #[test]
796    fn test_effective_chars_from_explicit() {
797        let config = DictionaryConfig {
798            chars: "ABCD".to_string(),
799            ..Default::default()
800        };
801        assert_eq!(config.effective_chars().unwrap(), "ABCD");
802    }
803
804    #[test]
805    fn test_effective_chars_from_range() {
806        let config = DictionaryConfig {
807            start: Some("A".to_string()),
808            length: Some(4),
809            ..Default::default()
810        };
811        assert_eq!(config.effective_chars().unwrap(), "ABCD");
812    }
813
814    #[test]
815    fn test_effective_chars_explicit_takes_priority() {
816        // Explicit chars should override start+length
817        let config = DictionaryConfig {
818            chars: "XYZ".to_string(),
819            start: Some("A".to_string()),
820            length: Some(4),
821            ..Default::default()
822        };
823        assert_eq!(config.effective_chars().unwrap(), "XYZ");
824    }
825
826    #[test]
827    fn test_effective_chars_unicode_range() {
828        // Test generating a range starting from a Unicode character
829        let config = DictionaryConfig {
830            start: Some("가".to_string()), // Korean Hangul U+AC00
831            length: Some(4),
832            ..Default::default()
833        };
834        let result = config.effective_chars().unwrap();
835        assert_eq!(result.chars().count(), 4);
836        assert_eq!(result, "가각갂갃");
837    }
838
839    #[test]
840    fn test_effective_chars_surrogate_gap_error() {
841        // Range crossing surrogate gap should error
842        let config = DictionaryConfig {
843            start: Some("\u{D700}".to_string()), // Just before surrogates
844            length: Some(512),                   // Would cross into surrogate range
845            ..Default::default()
846        };
847        assert!(config.effective_chars().is_err());
848    }
849
850    #[test]
851    fn test_effective_chars_exceeds_unicode_max() {
852        // Range exceeding max Unicode should error
853        let config = DictionaryConfig {
854            start: Some("\u{10FFFE}".to_string()), // Near end of Unicode
855            length: Some(10),                      // Would exceed U+10FFFF
856            ..Default::default()
857        };
858        assert!(config.effective_chars().is_err());
859    }
860
861    #[test]
862    fn test_effective_mode_with_length_field() {
863        // Auto-detect should use length field when chars is empty
864        let config = DictionaryConfig {
865            start: Some("A".to_string()),
866            length: Some(64), // 64 = 2^6 → Chunked
867            ..Default::default()
868        };
869        assert_eq!(config.effective_mode(), EncodingMode::Chunked);
870
871        let config = DictionaryConfig {
872            start: Some("A".to_string()),
873            length: Some(52), // 52 ≠ 2^n → Radix
874            ..Default::default()
875        };
876        assert_eq!(config.effective_mode(), EncodingMode::Radix);
877    }
878}