base_d/core/
config.rs

1use serde::Deserialize;
2use std::collections::HashMap;
3
4// Include generated dictionary registry from build.rs
5include!(concat!(env!("OUT_DIR"), "/registry.rs"));
6
7/// Dictionary type: character-based or word-based.
8#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)]
9#[serde(rename_all = "snake_case")]
10pub enum DictionaryType {
11    /// Character-based dictionary (traditional encoding)
12    #[default]
13    Char,
14    /// Word-based dictionary (BIP-39, Diceware, etc.)
15    Word,
16}
17
18/// Encoding strategy for converting binary data to text.
19///
20/// Different modes offer different tradeoffs between efficiency, compatibility,
21/// and features.
22#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
23#[serde(rename_all = "snake_case")]
24#[derive(Default)]
25pub enum EncodingMode {
26    /// True radix/base conversion treating data as a large number.
27    /// Works with any dictionary size. Output length varies with input.
28    /// Requires entire input before producing output (not streamable).
29    #[default]
30    #[serde(alias = "base_conversion")]
31    Radix,
32    /// Fixed-size bit chunking per RFC 4648.
33    /// Requires power-of-two dictionary size. Supports padding.
34    Chunked,
35    /// Direct 1:1 byte-to-character mapping using Unicode codepoint ranges.
36    /// Zero encoding overhead. Always 256 characters.
37    ByteRange,
38}
39
40/// Configuration for a single dictionary loaded from TOML.
41#[derive(Debug, Deserialize, Clone)]
42pub struct DictionaryConfig {
43    // === Type discriminant ===
44    /// Dictionary type: "char" (default) or "word"
45    #[serde(default, rename = "type")]
46    pub dictionary_type: DictionaryType,
47
48    // === Character-based fields ===
49    /// The characters comprising the dictionary (explicit list)
50    #[serde(default)]
51    pub chars: String,
52    /// Starting character for range-based dictionary definition
53    /// Use with `length` to define sequential Unicode ranges
54    #[serde(default)]
55    pub start: Option<String>,
56    /// Number of characters in range-based dictionary
57    /// Use with `start` to define sequential Unicode ranges
58    #[serde(default)]
59    pub length: Option<usize>,
60    /// Starting Unicode codepoint for ByteRange mode (256 chars)
61    #[serde(default)]
62    pub start_codepoint: Option<u32>,
63
64    // === Word-based fields ===
65    /// Inline word list for word-based dictionaries
66    #[serde(default)]
67    pub words: Option<Vec<String>>,
68    /// Path to external word list file (one word per line)
69    #[serde(default)]
70    pub words_file: Option<String>,
71    /// Delimiter between words in encoded output (default: " ")
72    #[serde(default)]
73    pub delimiter: Option<String>,
74    /// Whether word matching is case-sensitive (default: false)
75    #[serde(default)]
76    pub case_sensitive: Option<bool>,
77
78    // === Common fields ===
79    /// The encoding mode to use (auto-detected if not specified)
80    #[serde(default)]
81    pub mode: Option<EncodingMode>,
82    /// Optional padding character (e.g., "=" for base64)
83    #[serde(default)]
84    pub padding: Option<String>,
85    /// Whether this dictionary renders consistently across platforms (default: true)
86    /// Dictionaries with common=false are excluded from random selection (--dejavu)
87    #[serde(default = "default_true")]
88    pub common: bool,
89}
90
91impl Default for DictionaryConfig {
92    fn default() -> Self {
93        Self {
94            dictionary_type: DictionaryType::default(),
95            chars: String::new(),
96            start: None,
97            length: None,
98            start_codepoint: None,
99            words: None,
100            words_file: None,
101            delimiter: None,
102            case_sensitive: None,
103            mode: None,
104            padding: None,
105            common: true, // default to common for random selection
106        }
107    }
108}
109
110impl DictionaryConfig {
111    /// Returns the effective character set, generating from range if needed.
112    ///
113    /// Priority:
114    /// 1. If `chars` is non-empty, use it directly
115    /// 2. If `start` + `length` are set, generate sequential range
116    /// 3. Otherwise return empty string (ByteRange mode uses start_codepoint instead)
117    pub fn effective_chars(&self) -> Result<String, String> {
118        // Explicit chars take priority
119        if !self.chars.is_empty() {
120            return Ok(self.chars.clone());
121        }
122
123        // Generate from start + length range
124        if let (Some(start_str), Some(length)) = (&self.start, self.length) {
125            let start_char = start_str
126                .chars()
127                .next()
128                .ok_or("start must contain at least one character")?;
129            let start_codepoint = start_char as u32;
130
131            return Self::generate_range(start_codepoint, length);
132        }
133
134        // No chars defined - might be ByteRange mode
135        Ok(String::new())
136    }
137
138    /// Generate a string of sequential Unicode characters from a range.
139    fn generate_range(start: u32, length: usize) -> Result<String, String> {
140        const MAX_UNICODE: u32 = 0x10FFFF;
141        const SURROGATE_START: u32 = 0xD800;
142        const SURROGATE_END: u32 = 0xDFFF;
143
144        if length == 0 {
145            return Err("length must be greater than 0".to_string());
146        }
147
148        let end = start
149            .checked_add(length as u32 - 1)
150            .ok_or("range exceeds maximum Unicode codepoint")?;
151
152        if end > MAX_UNICODE {
153            return Err(format!(
154                "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
155                end, MAX_UNICODE
156            ));
157        }
158
159        // Check for surrogate gap crossing
160        let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
161        if crosses_surrogates {
162            return Err(format!(
163                "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
164                start, end
165            ));
166        }
167
168        let mut result = String::with_capacity(length * 4); // UTF-8 worst case
169        for i in 0..length {
170            let codepoint = start + i as u32;
171            match char::from_u32(codepoint) {
172                Some(c) => result.push(c),
173                None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
174            }
175        }
176
177        Ok(result)
178    }
179
180    /// Returns the effective encoding mode, auto-detecting if not explicitly set.
181    ///
182    /// Auto-detection rules:
183    /// - ByteRange: Must be explicitly set (requires start_codepoint)
184    /// - Chunked: If alphabet length is a power of 2
185    /// - Radix: Otherwise (true base conversion)
186    pub fn effective_mode(&self) -> EncodingMode {
187        if let Some(mode) = &self.mode {
188            return mode.clone();
189        }
190
191        // Auto-detect based on alphabet length
192        let len = if self.start_codepoint.is_some() {
193            // ByteRange must be explicit, but if someone sets start_codepoint
194            // without mode, assume they want ByteRange
195            return EncodingMode::ByteRange;
196        } else if let Some(length) = self.length {
197            // Range-based definition
198            length
199        } else {
200            self.chars.chars().count()
201        };
202
203        if len > 0 && len.is_power_of_two() {
204            EncodingMode::Chunked
205        } else {
206            EncodingMode::Radix
207        }
208    }
209}
210
211fn default_true() -> bool {
212    true
213}
214
215/// Collection of dictionary configurations loaded from TOML files.
216#[derive(Debug, Deserialize)]
217pub struct DictionaryRegistry {
218    /// Map of dictionary names to their configurations
219    pub dictionaries: HashMap<String, DictionaryConfig>,
220    /// Compression algorithm configurations
221    #[serde(default)]
222    pub compression: HashMap<String, CompressionConfig>,
223    /// Global settings
224    #[serde(default)]
225    pub settings: Settings,
226}
227
228/// Configuration for a compression algorithm.
229#[derive(Debug, Deserialize, Clone)]
230pub struct CompressionConfig {
231    /// Default compression level
232    pub default_level: u32,
233}
234
235/// xxHash-specific settings.
236#[derive(Debug, Deserialize, Clone, Default)]
237pub struct XxHashSettings {
238    /// Default seed for xxHash algorithms
239    #[serde(default)]
240    pub default_seed: u64,
241    /// Path to default secret file for XXH3 variants
242    #[serde(default)]
243    pub default_secret_file: Option<String>,
244}
245
246/// Global settings for base-d.
247#[derive(Debug, Deserialize, Clone, Default)]
248pub struct Settings {
249    /// Default dictionary - if not set, requires explicit -e or --dejavu
250    #[serde(default)]
251    pub default_dictionary: Option<String>,
252    /// xxHash configuration
253    #[serde(default)]
254    pub xxhash: XxHashSettings,
255}
256
257impl DictionaryRegistry {
258    /// Parses dictionary configurations from TOML content.
259    pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
260        toml::from_str(content)
261    }
262
263    /// Loads the built-in dictionary configurations.
264    ///
265    /// Returns the default dictionaries bundled with the library.
266    pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
267        Ok(Self {
268            dictionaries: build_registry(),
269            compression: HashMap::new(),
270            settings: Settings::default(),
271        })
272    }
273
274    /// Loads configuration from a custom file path.
275    pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
276        let content = std::fs::read_to_string(path)?;
277        Ok(Self::from_toml(&content)?)
278    }
279
280    /// Loads configuration with user overrides from standard locations.
281    ///
282    /// Searches in priority order:
283    /// 1. Built-in dictionaries (from library)
284    /// 2. `~/.config/base-d/dictionaries.toml` (user overrides)
285    /// 3. `./dictionaries.toml` (project-local overrides)
286    ///
287    /// Later configurations override earlier ones for matching dictionary names.
288    pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
289        let mut config = Self::load_default()?;
290
291        // Try to load user config from ~/.config/base-d/dictionaries.toml
292        if let Some(config_dir) = dirs::config_dir() {
293            let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
294            if user_config_path.exists() {
295                match Self::load_from_file(&user_config_path) {
296                    Ok(user_config) => {
297                        config.merge(user_config);
298                    }
299                    Err(e) => {
300                        eprintln!(
301                            "Warning: Failed to load user config from {:?}: {}",
302                            user_config_path, e
303                        );
304                    }
305                }
306            }
307        }
308
309        // Try to load local config from ./dictionaries.toml
310        let local_config_path = std::path::Path::new("dictionaries.toml");
311        if local_config_path.exists() {
312            match Self::load_from_file(local_config_path) {
313                Ok(local_config) => {
314                    config.merge(local_config);
315                }
316                Err(e) => {
317                    eprintln!(
318                        "Warning: Failed to load local config from {:?}: {}",
319                        local_config_path, e
320                    );
321                }
322            }
323        }
324
325        Ok(config)
326    }
327
328    /// Merges another configuration into this one.
329    ///
330    /// Dictionaries from `other` override dictionaries with the same name in `self`.
331    pub fn merge(&mut self, other: DictionaryRegistry) {
332        for (name, dictionary) in other.dictionaries {
333            self.dictionaries.insert(name, dictionary);
334        }
335    }
336
337    /// Retrieves an dictionary configuration by name.
338    pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
339        self.dictionaries.get(name)
340    }
341
342    /// Builds a ready-to-use Dictionary from a named configuration.
343    ///
344    /// This is a convenience method that handles the common pattern of:
345    /// 1. Looking up the dictionary config
346    /// 2. Getting effective chars
347    /// 3. Building the Dictionary with proper mode/padding
348    ///
349    /// # Example
350    /// ```
351    /// # use base_d::DictionaryRegistry;
352    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
353    /// let registry = DictionaryRegistry::load_default()?;
354    /// let dict = registry.dictionary("base64")?;
355    /// let encoded = base_d::encode(b"Hello", &dict);
356    /// # Ok(())
357    /// # }
358    /// ```
359    pub fn dictionary(
360        &self,
361        name: &str,
362    ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
363    {
364        let config = self.get_dictionary(name).ok_or_else(|| {
365            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
366        })?;
367
368        self.build_dictionary(config).map_err(|e| {
369            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
370        })
371    }
372
373    /// Returns a random dictionary suitable for encoding.
374    ///
375    /// Only selects from dictionaries marked as `common = true` (the default).
376    /// These are dictionaries that render consistently across platforms.
377    ///
378    /// # Example
379    /// ```
380    /// # use base_d::DictionaryRegistry;
381    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
382    /// let registry = DictionaryRegistry::load_default()?;
383    /// let (name, dict) = registry.random()?;
384    /// let encoded = base_d::encode(b"Hello", &dict);
385    /// # Ok(())
386    /// # }
387    /// ```
388    pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
389        use rand::seq::IteratorRandom;
390
391        let common_names: Vec<&String> = self
392            .dictionaries
393            .iter()
394            .filter(|(_, config)| {
395                // Only include common, character-based dictionaries
396                config.common && config.dictionary_type == DictionaryType::Char
397            })
398            .map(|(name, _)| name)
399            .collect();
400
401        let name = common_names
402            .into_iter()
403            .choose(&mut rand::rng())
404            .ok_or("No common dictionaries available")?;
405
406        let dict = self.dictionary(name)?;
407        Ok((name.clone(), dict))
408    }
409
410    /// Returns a list of all dictionary names.
411    pub fn names(&self) -> Vec<&str> {
412        self.dictionaries.keys().map(|s| s.as_str()).collect()
413    }
414
415    /// Returns a list of common dictionary names (suitable for random selection).
416    pub fn common_names(&self) -> Vec<&str> {
417        self.dictionaries
418            .iter()
419            .filter(|(_, config)| config.common)
420            .map(|(name, _)| name.as_str())
421            .collect()
422    }
423
424    /// Internal helper to build a Dictionary from a DictionaryConfig.
425    fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
426        use crate::core::config::EncodingMode;
427
428        let mode = config.effective_mode();
429
430        // ByteRange mode uses start_codepoint, not chars
431        if mode == EncodingMode::ByteRange {
432            let start = config
433                .start_codepoint
434                .ok_or("ByteRange mode requires start_codepoint")?;
435            return crate::Dictionary::builder()
436                .mode(mode)
437                .start_codepoint(start)
438                .build();
439        }
440
441        // Get effective chars (handles both explicit and range-based)
442        let chars_str = config.effective_chars()?;
443        let chars: Vec<char> = chars_str.chars().collect();
444
445        // Build with optional padding
446        let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
447
448        if let Some(pad_str) = &config.padding
449            && let Some(pad_char) = pad_str.chars().next()
450        {
451            builder = builder.padding(pad_char);
452        }
453
454        builder.build()
455    }
456
457    /// Builds a WordDictionary from a named configuration.
458    ///
459    /// # Errors
460    ///
461    /// Returns error if:
462    /// - Dictionary not found
463    /// - Dictionary is not word-type
464    /// - Word list file cannot be read
465    /// - Word dictionary building fails
466    ///
467    /// # Example
468    /// ```
469    /// # use base_d::DictionaryRegistry;
470    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
471    /// let registry = DictionaryRegistry::load_default()?;
472    /// // Would work if bip39 is defined as a word dictionary
473    /// // let dict = registry.word_dictionary("bip39")?;
474    /// # Ok(())
475    /// # }
476    /// ```
477    pub fn word_dictionary(
478        &self,
479        name: &str,
480    ) -> Result<crate::WordDictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
481    {
482        let config = self.get_dictionary(name).ok_or_else(|| {
483            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
484        })?;
485
486        // Verify it's a word dictionary
487        if config.dictionary_type != DictionaryType::Word {
488            return Err(
489                crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
490                    name,
491                    format!(
492                        "Dictionary '{}' is not a word dictionary (type is {:?})",
493                        name, config.dictionary_type
494                    ),
495                ),
496            );
497        }
498
499        self.build_word_dictionary(config).map_err(|e| {
500            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
501        })
502    }
503
504    /// Internal helper to build a WordDictionary from a DictionaryConfig.
505    fn build_word_dictionary(
506        &self,
507        config: &DictionaryConfig,
508    ) -> Result<crate::WordDictionary, String> {
509        let mut builder = crate::WordDictionary::builder();
510
511        // Get words from inline list, file, or builtin
512        if let Some(ref words) = config.words {
513            builder = builder.words(words.clone());
514        } else if let Some(ref words_file) = config.words_file {
515            // Check for embedded word lists first (generated by build.rs)
516            let content = if let Some(embedded) = get_embedded_wordlist(words_file) {
517                embedded.to_string()
518            } else {
519                // Check for builtin word lists
520                match words_file.as_str() {
521                    "builtin:bip39" | "builtin:bip39-english" => {
522                        crate::wordlists::BIP39_ENGLISH.to_string()
523                    }
524                    "builtin:eff_long" | "builtin:eff-long" => {
525                        crate::wordlists::EFF_LONG.to_string()
526                    }
527                    "builtin:eff_short1" | "builtin:eff-short1" => {
528                        crate::wordlists::EFF_SHORT1.to_string()
529                    }
530                    "builtin:eff_short2" | "builtin:eff-short2" => {
531                        crate::wordlists::EFF_SHORT2.to_string()
532                    }
533                    "builtin:diceware" => crate::wordlists::DICEWARE.to_string(),
534                    "builtin:pgp_even" | "builtin:pgp-even" => {
535                        crate::wordlists::PGP_EVEN.to_string()
536                    }
537                    "builtin:pgp_odd" | "builtin:pgp-odd" => crate::wordlists::PGP_ODD.to_string(),
538                    "builtin:nato" => crate::wordlists::NATO.to_string(),
539                    "builtin:buzzwords" => crate::wordlists::BUZZWORDS.to_string(),
540                    "builtin:klingon" => crate::wordlists::KLINGON.to_string(),
541                    "builtin:pokemon" => crate::wordlists::POKEMON.to_string(),
542                    _ => {
543                        // Resolve path (support ~ expansion)
544                        let expanded = shellexpand::tilde(words_file);
545                        std::fs::read_to_string(expanded.as_ref()).map_err(|e| {
546                            format!("Failed to read words file '{}': {}", words_file, e)
547                        })?
548                    }
549                }
550            };
551            builder = builder.words_from_str(&content);
552        } else {
553            return Err("Word dictionary must have 'words' or 'words_file'".to_string());
554        }
555
556        // Set optional delimiter
557        if let Some(ref delimiter) = config.delimiter {
558            builder = builder.delimiter(delimiter.clone());
559        }
560
561        // Set case sensitivity
562        if let Some(case_sensitive) = config.case_sensitive {
563            builder = builder.case_sensitive(case_sensitive);
564        }
565
566        builder.build()
567    }
568
569    /// Returns the dictionary type for a named dictionary.
570    ///
571    /// Returns `None` if the dictionary is not found.
572    pub fn dictionary_type(&self, name: &str) -> Option<DictionaryType> {
573        self.get_dictionary(name).map(|c| c.dictionary_type.clone())
574    }
575
576    /// Checks if a dictionary is word-based.
577    pub fn is_word_dictionary(&self, name: &str) -> bool {
578        self.dictionary_type(name) == Some(DictionaryType::Word)
579    }
580}
581
582#[cfg(test)]
583mod tests {
584    use super::*;
585
586    #[test]
587    fn test_load_default_config() {
588        let config = DictionaryRegistry::load_default().unwrap();
589        assert!(config.dictionaries.contains_key("cards"));
590    }
591
592    #[test]
593    fn test_cards_dictionary_length() {
594        let config = DictionaryRegistry::load_default().unwrap();
595        let cards = config.get_dictionary("cards").unwrap();
596        assert_eq!(cards.chars.chars().count(), 52);
597    }
598
599    #[test]
600    fn test_base64_chunked_mode() {
601        let config = DictionaryRegistry::load_default().unwrap();
602        let base64 = config.get_dictionary("base64").unwrap();
603        assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
604        assert_eq!(base64.padding, Some("=".to_string()));
605    }
606
607    #[test]
608    fn test_base64_radix_mode() {
609        let config = DictionaryRegistry::load_default().unwrap();
610        let base64_radix = config.get_dictionary("base64_radix").unwrap();
611        assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
612    }
613
614    #[test]
615    fn test_auto_detection_power_of_two() {
616        // Power of 2 → Chunked
617        let config = DictionaryConfig {
618            chars: "ABCD".to_string(), // 4 = 2^2
619            ..Default::default()
620        };
621        assert_eq!(config.effective_mode(), EncodingMode::Chunked);
622
623        // Not power of 2 → Radix
624        let config = DictionaryConfig {
625            chars: "ABC".to_string(), // 3 ≠ 2^n
626            ..Default::default()
627        };
628        assert_eq!(config.effective_mode(), EncodingMode::Radix);
629    }
630
631    #[test]
632    fn test_explicit_mode_override() {
633        // Explicit mode overrides auto-detection
634        let config = DictionaryConfig {
635            chars: "ABCD".to_string(),       // Would be Chunked
636            mode: Some(EncodingMode::Radix), // But explicitly set to Radix
637            ..Default::default()
638        };
639        assert_eq!(config.effective_mode(), EncodingMode::Radix);
640    }
641
642    #[test]
643    fn test_merge_configs() {
644        let mut config1 = DictionaryRegistry {
645            dictionaries: HashMap::new(),
646            compression: HashMap::new(),
647            settings: Settings::default(),
648        };
649        config1.dictionaries.insert(
650            "test1".to_string(),
651            DictionaryConfig {
652                chars: "ABC".to_string(),
653                mode: Some(EncodingMode::Radix),
654                ..Default::default()
655            },
656        );
657
658        let mut config2 = DictionaryRegistry {
659            dictionaries: HashMap::new(),
660            compression: HashMap::new(),
661            settings: Settings::default(),
662        };
663        config2.dictionaries.insert(
664            "test2".to_string(),
665            DictionaryConfig {
666                chars: "XYZ".to_string(),
667                mode: Some(EncodingMode::Radix),
668                ..Default::default()
669            },
670        );
671        config2.dictionaries.insert(
672            "test1".to_string(),
673            DictionaryConfig {
674                chars: "DEF".to_string(),
675                mode: Some(EncodingMode::Radix),
676                ..Default::default()
677            },
678        );
679
680        config1.merge(config2);
681
682        assert_eq!(config1.dictionaries.len(), 2);
683        assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
684        assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
685    }
686
687    #[test]
688    fn test_load_from_toml_string() {
689        let toml_content = r#"
690[dictionaries.custom]
691chars = "0123456789"
692mode = "base_conversion"
693"#;
694        let config = DictionaryRegistry::from_toml(toml_content).unwrap();
695        assert!(config.dictionaries.contains_key("custom"));
696        assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
697    }
698
699    #[test]
700    fn test_effective_chars_from_explicit() {
701        let config = DictionaryConfig {
702            chars: "ABCD".to_string(),
703            ..Default::default()
704        };
705        assert_eq!(config.effective_chars().unwrap(), "ABCD");
706    }
707
708    #[test]
709    fn test_effective_chars_from_range() {
710        let config = DictionaryConfig {
711            start: Some("A".to_string()),
712            length: Some(4),
713            ..Default::default()
714        };
715        assert_eq!(config.effective_chars().unwrap(), "ABCD");
716    }
717
718    #[test]
719    fn test_effective_chars_explicit_takes_priority() {
720        // Explicit chars should override start+length
721        let config = DictionaryConfig {
722            chars: "XYZ".to_string(),
723            start: Some("A".to_string()),
724            length: Some(4),
725            ..Default::default()
726        };
727        assert_eq!(config.effective_chars().unwrap(), "XYZ");
728    }
729
730    #[test]
731    fn test_effective_chars_unicode_range() {
732        // Test generating a range starting from a Unicode character
733        let config = DictionaryConfig {
734            start: Some("가".to_string()), // Korean Hangul U+AC00
735            length: Some(4),
736            ..Default::default()
737        };
738        let result = config.effective_chars().unwrap();
739        assert_eq!(result.chars().count(), 4);
740        assert_eq!(result, "가각갂갃");
741    }
742
743    #[test]
744    fn test_effective_chars_surrogate_gap_error() {
745        // Range crossing surrogate gap should error
746        let config = DictionaryConfig {
747            start: Some("\u{D700}".to_string()), // Just before surrogates
748            length: Some(512),                   // Would cross into surrogate range
749            ..Default::default()
750        };
751        assert!(config.effective_chars().is_err());
752    }
753
754    #[test]
755    fn test_effective_chars_exceeds_unicode_max() {
756        // Range exceeding max Unicode should error
757        let config = DictionaryConfig {
758            start: Some("\u{10FFFE}".to_string()), // Near end of Unicode
759            length: Some(10),                      // Would exceed U+10FFFF
760            ..Default::default()
761        };
762        assert!(config.effective_chars().is_err());
763    }
764
765    #[test]
766    fn test_effective_mode_with_length_field() {
767        // Auto-detect should use length field when chars is empty
768        let config = DictionaryConfig {
769            start: Some("A".to_string()),
770            length: Some(64), // 64 = 2^6 → Chunked
771            ..Default::default()
772        };
773        assert_eq!(config.effective_mode(), EncodingMode::Chunked);
774
775        let config = DictionaryConfig {
776            start: Some("A".to_string()),
777            length: Some(52), // 52 ≠ 2^n → Radix
778            ..Default::default()
779        };
780        assert_eq!(config.effective_mode(), EncodingMode::Radix);
781    }
782}