base_d/core/
config.rs

1use serde::Deserialize;
2use std::collections::HashMap;
3
4/// Dictionary type: character-based or word-based.
5#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)]
6#[serde(rename_all = "snake_case")]
7pub enum DictionaryType {
8    /// Character-based dictionary (traditional encoding)
9    #[default]
10    Char,
11    /// Word-based dictionary (BIP-39, Diceware, etc.)
12    Word,
13}
14
15/// Encoding strategy for converting binary data to text.
16///
17/// Different modes offer different tradeoffs between efficiency, compatibility,
18/// and features.
19#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
20#[serde(rename_all = "snake_case")]
21#[derive(Default)]
22pub enum EncodingMode {
23    /// True radix/base conversion treating data as a large number.
24    /// Works with any dictionary size. Output length varies with input.
25    /// Requires entire input before producing output (not streamable).
26    #[default]
27    #[serde(alias = "base_conversion")]
28    Radix,
29    /// Fixed-size bit chunking per RFC 4648.
30    /// Requires power-of-two dictionary size. Supports padding.
31    Chunked,
32    /// Direct 1:1 byte-to-character mapping using Unicode codepoint ranges.
33    /// Zero encoding overhead. Always 256 characters.
34    ByteRange,
35}
36
37/// Configuration for a single dictionary loaded from TOML.
38#[derive(Debug, Deserialize, Clone)]
39pub struct DictionaryConfig {
40    // === Type discriminant ===
41    /// Dictionary type: "char" (default) or "word"
42    #[serde(default, rename = "type")]
43    pub dictionary_type: DictionaryType,
44
45    // === Character-based fields ===
46    /// The characters comprising the dictionary (explicit list)
47    #[serde(default)]
48    pub chars: String,
49    /// Starting character for range-based dictionary definition
50    /// Use with `length` to define sequential Unicode ranges
51    #[serde(default)]
52    pub start: Option<String>,
53    /// Number of characters in range-based dictionary
54    /// Use with `start` to define sequential Unicode ranges
55    #[serde(default)]
56    pub length: Option<usize>,
57    /// Starting Unicode codepoint for ByteRange mode (256 chars)
58    #[serde(default)]
59    pub start_codepoint: Option<u32>,
60
61    // === Word-based fields ===
62    /// Inline word list for word-based dictionaries
63    #[serde(default)]
64    pub words: Option<Vec<String>>,
65    /// Path to external word list file (one word per line)
66    #[serde(default)]
67    pub words_file: Option<String>,
68    /// Delimiter between words in encoded output (default: " ")
69    #[serde(default)]
70    pub delimiter: Option<String>,
71    /// Whether word matching is case-sensitive (default: false)
72    #[serde(default)]
73    pub case_sensitive: Option<bool>,
74
75    // === Common fields ===
76    /// The encoding mode to use (auto-detected if not specified)
77    #[serde(default)]
78    pub mode: Option<EncodingMode>,
79    /// Optional padding character (e.g., "=" for base64)
80    #[serde(default)]
81    pub padding: Option<String>,
82    /// Whether this dictionary renders consistently across platforms (default: true)
83    /// Dictionaries with common=false are excluded from random selection (--dejavu)
84    #[serde(default = "default_true")]
85    pub common: bool,
86}
87
88impl Default for DictionaryConfig {
89    fn default() -> Self {
90        Self {
91            dictionary_type: DictionaryType::default(),
92            chars: String::new(),
93            start: None,
94            length: None,
95            start_codepoint: None,
96            words: None,
97            words_file: None,
98            delimiter: None,
99            case_sensitive: None,
100            mode: None,
101            padding: None,
102            common: true, // default to common for random selection
103        }
104    }
105}
106
107impl DictionaryConfig {
108    /// Returns the effective character set, generating from range if needed.
109    ///
110    /// Priority:
111    /// 1. If `chars` is non-empty, use it directly
112    /// 2. If `start` + `length` are set, generate sequential range
113    /// 3. Otherwise return empty string (ByteRange mode uses start_codepoint instead)
114    pub fn effective_chars(&self) -> Result<String, String> {
115        // Explicit chars take priority
116        if !self.chars.is_empty() {
117            return Ok(self.chars.clone());
118        }
119
120        // Generate from start + length range
121        if let (Some(start_str), Some(length)) = (&self.start, self.length) {
122            let start_char = start_str
123                .chars()
124                .next()
125                .ok_or("start must contain at least one character")?;
126            let start_codepoint = start_char as u32;
127
128            return Self::generate_range(start_codepoint, length);
129        }
130
131        // No chars defined - might be ByteRange mode
132        Ok(String::new())
133    }
134
135    /// Generate a string of sequential Unicode characters from a range.
136    fn generate_range(start: u32, length: usize) -> Result<String, String> {
137        const MAX_UNICODE: u32 = 0x10FFFF;
138        const SURROGATE_START: u32 = 0xD800;
139        const SURROGATE_END: u32 = 0xDFFF;
140
141        if length == 0 {
142            return Err("length must be greater than 0".to_string());
143        }
144
145        let end = start
146            .checked_add(length as u32 - 1)
147            .ok_or("range exceeds maximum Unicode codepoint")?;
148
149        if end > MAX_UNICODE {
150            return Err(format!(
151                "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
152                end, MAX_UNICODE
153            ));
154        }
155
156        // Check for surrogate gap crossing
157        let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
158        if crosses_surrogates {
159            return Err(format!(
160                "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
161                start, end
162            ));
163        }
164
165        let mut result = String::with_capacity(length * 4); // UTF-8 worst case
166        for i in 0..length {
167            let codepoint = start + i as u32;
168            match char::from_u32(codepoint) {
169                Some(c) => result.push(c),
170                None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
171            }
172        }
173
174        Ok(result)
175    }
176
177    /// Returns the effective encoding mode, auto-detecting if not explicitly set.
178    ///
179    /// Auto-detection rules:
180    /// - ByteRange: Must be explicitly set (requires start_codepoint)
181    /// - Chunked: If alphabet length is a power of 2
182    /// - Radix: Otherwise (true base conversion)
183    pub fn effective_mode(&self) -> EncodingMode {
184        if let Some(mode) = &self.mode {
185            return mode.clone();
186        }
187
188        // Auto-detect based on alphabet length
189        let len = if self.start_codepoint.is_some() {
190            // ByteRange must be explicit, but if someone sets start_codepoint
191            // without mode, assume they want ByteRange
192            return EncodingMode::ByteRange;
193        } else if let Some(length) = self.length {
194            // Range-based definition
195            length
196        } else {
197            self.chars.chars().count()
198        };
199
200        if len > 0 && len.is_power_of_two() {
201            EncodingMode::Chunked
202        } else {
203            EncodingMode::Radix
204        }
205    }
206}
207
208fn default_true() -> bool {
209    true
210}
211
212/// Collection of dictionary configurations loaded from TOML files.
213#[derive(Debug, Deserialize)]
214pub struct DictionaryRegistry {
215    /// Map of dictionary names to their configurations
216    pub dictionaries: HashMap<String, DictionaryConfig>,
217    /// Compression algorithm configurations
218    #[serde(default)]
219    pub compression: HashMap<String, CompressionConfig>,
220    /// Global settings
221    #[serde(default)]
222    pub settings: Settings,
223}
224
225/// Configuration for a compression algorithm.
226#[derive(Debug, Deserialize, Clone)]
227pub struct CompressionConfig {
228    /// Default compression level
229    pub default_level: u32,
230}
231
232/// xxHash-specific settings.
233#[derive(Debug, Deserialize, Clone, Default)]
234pub struct XxHashSettings {
235    /// Default seed for xxHash algorithms
236    #[serde(default)]
237    pub default_seed: u64,
238    /// Path to default secret file for XXH3 variants
239    #[serde(default)]
240    pub default_secret_file: Option<String>,
241}
242
243/// Global settings for base-d.
244#[derive(Debug, Deserialize, Clone, Default)]
245pub struct Settings {
246    /// Default dictionary - if not set, requires explicit -e or --dejavu
247    #[serde(default)]
248    pub default_dictionary: Option<String>,
249    /// xxHash configuration
250    #[serde(default)]
251    pub xxhash: XxHashSettings,
252}
253
254impl DictionaryRegistry {
255    /// Parses dictionary configurations from TOML content.
256    pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
257        toml::from_str(content)
258    }
259
260    /// Loads the built-in dictionary configurations.
261    ///
262    /// Returns the default dictionaries bundled with the library.
263    pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
264        let content = include_str!("../../dictionaries.toml");
265        Ok(Self::from_toml(content)?)
266    }
267
268    /// Loads configuration from a custom file path.
269    pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
270        let content = std::fs::read_to_string(path)?;
271        Ok(Self::from_toml(&content)?)
272    }
273
274    /// Loads configuration with user overrides from standard locations.
275    ///
276    /// Searches in priority order:
277    /// 1. Built-in dictionaries (from library)
278    /// 2. `~/.config/base-d/dictionaries.toml` (user overrides)
279    /// 3. `./dictionaries.toml` (project-local overrides)
280    ///
281    /// Later configurations override earlier ones for matching dictionary names.
282    pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
283        let mut config = Self::load_default()?;
284
285        // Try to load user config from ~/.config/base-d/dictionaries.toml
286        if let Some(config_dir) = dirs::config_dir() {
287            let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
288            if user_config_path.exists() {
289                match Self::load_from_file(&user_config_path) {
290                    Ok(user_config) => {
291                        config.merge(user_config);
292                    }
293                    Err(e) => {
294                        eprintln!(
295                            "Warning: Failed to load user config from {:?}: {}",
296                            user_config_path, e
297                        );
298                    }
299                }
300            }
301        }
302
303        // Try to load local config from ./dictionaries.toml
304        let local_config_path = std::path::Path::new("dictionaries.toml");
305        if local_config_path.exists() {
306            match Self::load_from_file(local_config_path) {
307                Ok(local_config) => {
308                    config.merge(local_config);
309                }
310                Err(e) => {
311                    eprintln!(
312                        "Warning: Failed to load local config from {:?}: {}",
313                        local_config_path, e
314                    );
315                }
316            }
317        }
318
319        Ok(config)
320    }
321
322    /// Merges another configuration into this one.
323    ///
324    /// Dictionaries from `other` override dictionaries with the same name in `self`.
325    pub fn merge(&mut self, other: DictionaryRegistry) {
326        for (name, dictionary) in other.dictionaries {
327            self.dictionaries.insert(name, dictionary);
328        }
329    }
330
331    /// Retrieves an dictionary configuration by name.
332    pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
333        self.dictionaries.get(name)
334    }
335
336    /// Builds a ready-to-use Dictionary from a named configuration.
337    ///
338    /// This is a convenience method that handles the common pattern of:
339    /// 1. Looking up the dictionary config
340    /// 2. Getting effective chars
341    /// 3. Building the Dictionary with proper mode/padding
342    ///
343    /// # Example
344    /// ```
345    /// # use base_d::DictionaryRegistry;
346    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
347    /// let registry = DictionaryRegistry::load_default()?;
348    /// let dict = registry.dictionary("base64")?;
349    /// let encoded = base_d::encode(b"Hello", &dict);
350    /// # Ok(())
351    /// # }
352    /// ```
353    pub fn dictionary(
354        &self,
355        name: &str,
356    ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
357    {
358        let config = self.get_dictionary(name).ok_or_else(|| {
359            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
360        })?;
361
362        self.build_dictionary(config).map_err(|e| {
363            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
364        })
365    }
366
367    /// Returns a random dictionary suitable for encoding.
368    ///
369    /// Only selects from dictionaries marked as `common = true` (the default).
370    /// These are dictionaries that render consistently across platforms.
371    ///
372    /// # Example
373    /// ```
374    /// # use base_d::DictionaryRegistry;
375    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
376    /// let registry = DictionaryRegistry::load_default()?;
377    /// let (name, dict) = registry.random()?;
378    /// let encoded = base_d::encode(b"Hello", &dict);
379    /// # Ok(())
380    /// # }
381    /// ```
382    pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
383        use rand::seq::IteratorRandom;
384
385        let common_names: Vec<&String> = self
386            .dictionaries
387            .iter()
388            .filter(|(_, config)| {
389                // Only include common, character-based dictionaries
390                config.common && config.dictionary_type == DictionaryType::Char
391            })
392            .map(|(name, _)| name)
393            .collect();
394
395        let name = common_names
396            .into_iter()
397            .choose(&mut rand::rng())
398            .ok_or("No common dictionaries available")?;
399
400        let dict = self.dictionary(name)?;
401        Ok((name.clone(), dict))
402    }
403
404    /// Returns a list of all dictionary names.
405    pub fn names(&self) -> Vec<&str> {
406        self.dictionaries.keys().map(|s| s.as_str()).collect()
407    }
408
409    /// Returns a list of common dictionary names (suitable for random selection).
410    pub fn common_names(&self) -> Vec<&str> {
411        self.dictionaries
412            .iter()
413            .filter(|(_, config)| config.common)
414            .map(|(name, _)| name.as_str())
415            .collect()
416    }
417
418    /// Internal helper to build a Dictionary from a DictionaryConfig.
419    fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
420        use crate::core::config::EncodingMode;
421
422        let mode = config.effective_mode();
423
424        // ByteRange mode uses start_codepoint, not chars
425        if mode == EncodingMode::ByteRange {
426            let start = config
427                .start_codepoint
428                .ok_or("ByteRange mode requires start_codepoint")?;
429            return crate::Dictionary::builder()
430                .mode(mode)
431                .start_codepoint(start)
432                .build();
433        }
434
435        // Get effective chars (handles both explicit and range-based)
436        let chars_str = config.effective_chars()?;
437        let chars: Vec<char> = chars_str.chars().collect();
438
439        // Build with optional padding
440        let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
441
442        if let Some(pad_str) = &config.padding
443            && let Some(pad_char) = pad_str.chars().next()
444        {
445            builder = builder.padding(pad_char);
446        }
447
448        builder.build()
449    }
450
451    /// Builds a WordDictionary from a named configuration.
452    ///
453    /// # Errors
454    ///
455    /// Returns error if:
456    /// - Dictionary not found
457    /// - Dictionary is not word-type
458    /// - Word list file cannot be read
459    /// - Word dictionary building fails
460    ///
461    /// # Example
462    /// ```
463    /// # use base_d::DictionaryRegistry;
464    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
465    /// let registry = DictionaryRegistry::load_default()?;
466    /// // Would work if bip39 is defined as a word dictionary
467    /// // let dict = registry.word_dictionary("bip39")?;
468    /// # Ok(())
469    /// # }
470    /// ```
471    pub fn word_dictionary(
472        &self,
473        name: &str,
474    ) -> Result<crate::WordDictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
475    {
476        let config = self.get_dictionary(name).ok_or_else(|| {
477            crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
478        })?;
479
480        // Verify it's a word dictionary
481        if config.dictionary_type != DictionaryType::Word {
482            return Err(
483                crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
484                    name,
485                    format!(
486                        "Dictionary '{}' is not a word dictionary (type is {:?})",
487                        name, config.dictionary_type
488                    ),
489                ),
490            );
491        }
492
493        self.build_word_dictionary(config).map_err(|e| {
494            crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
495        })
496    }
497
498    /// Internal helper to build a WordDictionary from a DictionaryConfig.
499    fn build_word_dictionary(
500        &self,
501        config: &DictionaryConfig,
502    ) -> Result<crate::WordDictionary, String> {
503        let mut builder = crate::WordDictionary::builder();
504
505        // Get words from inline list, file, or builtin
506        if let Some(ref words) = config.words {
507            builder = builder.words(words.clone());
508        } else if let Some(ref words_file) = config.words_file {
509            // Check for builtin word lists
510            let content = match words_file.as_str() {
511                "builtin:bip39" | "builtin:bip39-english" => {
512                    crate::wordlists::BIP39_ENGLISH.to_string()
513                }
514                "builtin:eff_long" | "builtin:eff-long" => crate::wordlists::EFF_LONG.to_string(),
515                "builtin:eff_short1" | "builtin:eff-short1" => {
516                    crate::wordlists::EFF_SHORT1.to_string()
517                }
518                "builtin:eff_short2" | "builtin:eff-short2" => {
519                    crate::wordlists::EFF_SHORT2.to_string()
520                }
521                "builtin:diceware" => crate::wordlists::DICEWARE.to_string(),
522                "builtin:pgp_even" | "builtin:pgp-even" => crate::wordlists::PGP_EVEN.to_string(),
523                "builtin:pgp_odd" | "builtin:pgp-odd" => crate::wordlists::PGP_ODD.to_string(),
524                _ => {
525                    // Resolve path (support ~ expansion)
526                    let expanded = shellexpand::tilde(words_file);
527                    std::fs::read_to_string(expanded.as_ref())
528                        .map_err(|e| format!("Failed to read words file '{}': {}", words_file, e))?
529                }
530            };
531            builder = builder.words_from_str(&content);
532        } else {
533            return Err("Word dictionary must have 'words' or 'words_file'".to_string());
534        }
535
536        // Set optional delimiter
537        if let Some(ref delimiter) = config.delimiter {
538            builder = builder.delimiter(delimiter.clone());
539        }
540
541        // Set case sensitivity
542        if let Some(case_sensitive) = config.case_sensitive {
543            builder = builder.case_sensitive(case_sensitive);
544        }
545
546        builder.build()
547    }
548
549    /// Returns the dictionary type for a named dictionary.
550    ///
551    /// Returns `None` if the dictionary is not found.
552    pub fn dictionary_type(&self, name: &str) -> Option<DictionaryType> {
553        self.get_dictionary(name).map(|c| c.dictionary_type.clone())
554    }
555
556    /// Checks if a dictionary is word-based.
557    pub fn is_word_dictionary(&self, name: &str) -> bool {
558        self.dictionary_type(name) == Some(DictionaryType::Word)
559    }
560}
561
562#[cfg(test)]
563mod tests {
564    use super::*;
565
566    #[test]
567    fn test_load_default_config() {
568        let config = DictionaryRegistry::load_default().unwrap();
569        assert!(config.dictionaries.contains_key("cards"));
570    }
571
572    #[test]
573    fn test_cards_dictionary_length() {
574        let config = DictionaryRegistry::load_default().unwrap();
575        let cards = config.get_dictionary("cards").unwrap();
576        assert_eq!(cards.chars.chars().count(), 52);
577    }
578
579    #[test]
580    fn test_base64_chunked_mode() {
581        let config = DictionaryRegistry::load_default().unwrap();
582        let base64 = config.get_dictionary("base64").unwrap();
583        assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
584        assert_eq!(base64.padding, Some("=".to_string()));
585    }
586
587    #[test]
588    fn test_base64_radix_mode() {
589        let config = DictionaryRegistry::load_default().unwrap();
590        let base64_radix = config.get_dictionary("base64_radix").unwrap();
591        assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
592    }
593
594    #[test]
595    fn test_auto_detection_power_of_two() {
596        // Power of 2 → Chunked
597        let config = DictionaryConfig {
598            chars: "ABCD".to_string(), // 4 = 2^2
599            ..Default::default()
600        };
601        assert_eq!(config.effective_mode(), EncodingMode::Chunked);
602
603        // Not power of 2 → Radix
604        let config = DictionaryConfig {
605            chars: "ABC".to_string(), // 3 ≠ 2^n
606            ..Default::default()
607        };
608        assert_eq!(config.effective_mode(), EncodingMode::Radix);
609    }
610
611    #[test]
612    fn test_explicit_mode_override() {
613        // Explicit mode overrides auto-detection
614        let config = DictionaryConfig {
615            chars: "ABCD".to_string(),       // Would be Chunked
616            mode: Some(EncodingMode::Radix), // But explicitly set to Radix
617            ..Default::default()
618        };
619        assert_eq!(config.effective_mode(), EncodingMode::Radix);
620    }
621
622    #[test]
623    fn test_merge_configs() {
624        let mut config1 = DictionaryRegistry {
625            dictionaries: HashMap::new(),
626            compression: HashMap::new(),
627            settings: Settings::default(),
628        };
629        config1.dictionaries.insert(
630            "test1".to_string(),
631            DictionaryConfig {
632                chars: "ABC".to_string(),
633                mode: Some(EncodingMode::Radix),
634                ..Default::default()
635            },
636        );
637
638        let mut config2 = DictionaryRegistry {
639            dictionaries: HashMap::new(),
640            compression: HashMap::new(),
641            settings: Settings::default(),
642        };
643        config2.dictionaries.insert(
644            "test2".to_string(),
645            DictionaryConfig {
646                chars: "XYZ".to_string(),
647                mode: Some(EncodingMode::Radix),
648                ..Default::default()
649            },
650        );
651        config2.dictionaries.insert(
652            "test1".to_string(),
653            DictionaryConfig {
654                chars: "DEF".to_string(),
655                mode: Some(EncodingMode::Radix),
656                ..Default::default()
657            },
658        );
659
660        config1.merge(config2);
661
662        assert_eq!(config1.dictionaries.len(), 2);
663        assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
664        assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
665    }
666
667    #[test]
668    fn test_load_from_toml_string() {
669        let toml_content = r#"
670[dictionaries.custom]
671chars = "0123456789"
672mode = "base_conversion"
673"#;
674        let config = DictionaryRegistry::from_toml(toml_content).unwrap();
675        assert!(config.dictionaries.contains_key("custom"));
676        assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
677    }
678
679    #[test]
680    fn test_effective_chars_from_explicit() {
681        let config = DictionaryConfig {
682            chars: "ABCD".to_string(),
683            ..Default::default()
684        };
685        assert_eq!(config.effective_chars().unwrap(), "ABCD");
686    }
687
688    #[test]
689    fn test_effective_chars_from_range() {
690        let config = DictionaryConfig {
691            start: Some("A".to_string()),
692            length: Some(4),
693            ..Default::default()
694        };
695        assert_eq!(config.effective_chars().unwrap(), "ABCD");
696    }
697
698    #[test]
699    fn test_effective_chars_explicit_takes_priority() {
700        // Explicit chars should override start+length
701        let config = DictionaryConfig {
702            chars: "XYZ".to_string(),
703            start: Some("A".to_string()),
704            length: Some(4),
705            ..Default::default()
706        };
707        assert_eq!(config.effective_chars().unwrap(), "XYZ");
708    }
709
710    #[test]
711    fn test_effective_chars_unicode_range() {
712        // Test generating a range starting from a Unicode character
713        let config = DictionaryConfig {
714            start: Some("가".to_string()), // Korean Hangul U+AC00
715            length: Some(4),
716            ..Default::default()
717        };
718        let result = config.effective_chars().unwrap();
719        assert_eq!(result.chars().count(), 4);
720        assert_eq!(result, "가각갂갃");
721    }
722
723    #[test]
724    fn test_effective_chars_surrogate_gap_error() {
725        // Range crossing surrogate gap should error
726        let config = DictionaryConfig {
727            start: Some("\u{D700}".to_string()), // Just before surrogates
728            length: Some(512),                   // Would cross into surrogate range
729            ..Default::default()
730        };
731        assert!(config.effective_chars().is_err());
732    }
733
734    #[test]
735    fn test_effective_chars_exceeds_unicode_max() {
736        // Range exceeding max Unicode should error
737        let config = DictionaryConfig {
738            start: Some("\u{10FFFE}".to_string()), // Near end of Unicode
739            length: Some(10),                      // Would exceed U+10FFFF
740            ..Default::default()
741        };
742        assert!(config.effective_chars().is_err());
743    }
744
745    #[test]
746    fn test_effective_mode_with_length_field() {
747        // Auto-detect should use length field when chars is empty
748        let config = DictionaryConfig {
749            start: Some("A".to_string()),
750            length: Some(64), // 64 = 2^6 → Chunked
751            ..Default::default()
752        };
753        assert_eq!(config.effective_mode(), EncodingMode::Chunked);
754
755        let config = DictionaryConfig {
756            start: Some("A".to_string()),
757            length: Some(52), // 52 ≠ 2^n → Radix
758            ..Default::default()
759        };
760        assert_eq!(config.effective_mode(), EncodingMode::Radix);
761    }
762}