1use serde::Deserialize;
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)]
6#[serde(rename_all = "snake_case")]
7pub enum DictionaryType {
8 #[default]
10 Char,
11 Word,
13}
14
15#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
20#[serde(rename_all = "snake_case")]
21#[derive(Default)]
22pub enum EncodingMode {
23 #[default]
27 #[serde(alias = "base_conversion")]
28 Radix,
29 Chunked,
32 ByteRange,
35}
36
37#[derive(Debug, Deserialize, Clone)]
39pub struct DictionaryConfig {
40 #[serde(default, rename = "type")]
43 pub dictionary_type: DictionaryType,
44
45 #[serde(default)]
48 pub chars: String,
49 #[serde(default)]
52 pub start: Option<String>,
53 #[serde(default)]
56 pub length: Option<usize>,
57 #[serde(default)]
59 pub start_codepoint: Option<u32>,
60
61 #[serde(default)]
64 pub words: Option<Vec<String>>,
65 #[serde(default)]
67 pub words_file: Option<String>,
68 #[serde(default)]
70 pub delimiter: Option<String>,
71 #[serde(default)]
73 pub case_sensitive: Option<bool>,
74
75 #[serde(default)]
78 pub mode: Option<EncodingMode>,
79 #[serde(default)]
81 pub padding: Option<String>,
82 #[serde(default = "default_true")]
85 pub common: bool,
86}
87
88impl Default for DictionaryConfig {
89 fn default() -> Self {
90 Self {
91 dictionary_type: DictionaryType::default(),
92 chars: String::new(),
93 start: None,
94 length: None,
95 start_codepoint: None,
96 words: None,
97 words_file: None,
98 delimiter: None,
99 case_sensitive: None,
100 mode: None,
101 padding: None,
102 common: true, }
104 }
105}
106
107impl DictionaryConfig {
108 pub fn effective_chars(&self) -> Result<String, String> {
115 if !self.chars.is_empty() {
117 return Ok(self.chars.clone());
118 }
119
120 if let (Some(start_str), Some(length)) = (&self.start, self.length) {
122 let start_char = start_str
123 .chars()
124 .next()
125 .ok_or("start must contain at least one character")?;
126 let start_codepoint = start_char as u32;
127
128 return Self::generate_range(start_codepoint, length);
129 }
130
131 Ok(String::new())
133 }
134
135 fn generate_range(start: u32, length: usize) -> Result<String, String> {
137 const MAX_UNICODE: u32 = 0x10FFFF;
138 const SURROGATE_START: u32 = 0xD800;
139 const SURROGATE_END: u32 = 0xDFFF;
140
141 if length == 0 {
142 return Err("length must be greater than 0".to_string());
143 }
144
145 let end = start
146 .checked_add(length as u32 - 1)
147 .ok_or("range exceeds maximum Unicode codepoint")?;
148
149 if end > MAX_UNICODE {
150 return Err(format!(
151 "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
152 end, MAX_UNICODE
153 ));
154 }
155
156 let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
158 if crosses_surrogates {
159 return Err(format!(
160 "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
161 start, end
162 ));
163 }
164
165 let mut result = String::with_capacity(length * 4); for i in 0..length {
167 let codepoint = start + i as u32;
168 match char::from_u32(codepoint) {
169 Some(c) => result.push(c),
170 None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
171 }
172 }
173
174 Ok(result)
175 }
176
177 pub fn effective_mode(&self) -> EncodingMode {
184 if let Some(mode) = &self.mode {
185 return mode.clone();
186 }
187
188 let len = if self.start_codepoint.is_some() {
190 return EncodingMode::ByteRange;
193 } else if let Some(length) = self.length {
194 length
196 } else {
197 self.chars.chars().count()
198 };
199
200 if len > 0 && len.is_power_of_two() {
201 EncodingMode::Chunked
202 } else {
203 EncodingMode::Radix
204 }
205 }
206}
207
208fn default_true() -> bool {
209 true
210}
211
212#[derive(Debug, Deserialize)]
214pub struct DictionaryRegistry {
215 pub dictionaries: HashMap<String, DictionaryConfig>,
217 #[serde(default)]
219 pub compression: HashMap<String, CompressionConfig>,
220 #[serde(default)]
222 pub settings: Settings,
223}
224
225#[derive(Debug, Deserialize, Clone)]
227pub struct CompressionConfig {
228 pub default_level: u32,
230}
231
232#[derive(Debug, Deserialize, Clone, Default)]
234pub struct XxHashSettings {
235 #[serde(default)]
237 pub default_seed: u64,
238 #[serde(default)]
240 pub default_secret_file: Option<String>,
241}
242
243#[derive(Debug, Deserialize, Clone, Default)]
245pub struct Settings {
246 #[serde(default)]
248 pub default_dictionary: Option<String>,
249 #[serde(default)]
251 pub xxhash: XxHashSettings,
252}
253
254impl DictionaryRegistry {
255 pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
257 toml::from_str(content)
258 }
259
260 pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
264 let content = include_str!("../../dictionaries.toml");
265 Ok(Self::from_toml(content)?)
266 }
267
268 pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
270 let content = std::fs::read_to_string(path)?;
271 Ok(Self::from_toml(&content)?)
272 }
273
274 pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
283 let mut config = Self::load_default()?;
284
285 if let Some(config_dir) = dirs::config_dir() {
287 let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
288 if user_config_path.exists() {
289 match Self::load_from_file(&user_config_path) {
290 Ok(user_config) => {
291 config.merge(user_config);
292 }
293 Err(e) => {
294 eprintln!(
295 "Warning: Failed to load user config from {:?}: {}",
296 user_config_path, e
297 );
298 }
299 }
300 }
301 }
302
303 let local_config_path = std::path::Path::new("dictionaries.toml");
305 if local_config_path.exists() {
306 match Self::load_from_file(local_config_path) {
307 Ok(local_config) => {
308 config.merge(local_config);
309 }
310 Err(e) => {
311 eprintln!(
312 "Warning: Failed to load local config from {:?}: {}",
313 local_config_path, e
314 );
315 }
316 }
317 }
318
319 Ok(config)
320 }
321
322 pub fn merge(&mut self, other: DictionaryRegistry) {
326 for (name, dictionary) in other.dictionaries {
327 self.dictionaries.insert(name, dictionary);
328 }
329 }
330
331 pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
333 self.dictionaries.get(name)
334 }
335
336 pub fn dictionary(
354 &self,
355 name: &str,
356 ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
357 {
358 let config = self.get_dictionary(name).ok_or_else(|| {
359 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
360 })?;
361
362 self.build_dictionary(config).map_err(|e| {
363 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
364 })
365 }
366
367 pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
383 use rand::seq::IteratorRandom;
384
385 let common_names: Vec<&String> = self
386 .dictionaries
387 .iter()
388 .filter(|(_, config)| {
389 config.common && config.dictionary_type == DictionaryType::Char
391 })
392 .map(|(name, _)| name)
393 .collect();
394
395 let name = common_names
396 .into_iter()
397 .choose(&mut rand::rng())
398 .ok_or("No common dictionaries available")?;
399
400 let dict = self.dictionary(name)?;
401 Ok((name.clone(), dict))
402 }
403
404 pub fn names(&self) -> Vec<&str> {
406 self.dictionaries.keys().map(|s| s.as_str()).collect()
407 }
408
409 pub fn common_names(&self) -> Vec<&str> {
411 self.dictionaries
412 .iter()
413 .filter(|(_, config)| config.common)
414 .map(|(name, _)| name.as_str())
415 .collect()
416 }
417
418 fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
420 use crate::core::config::EncodingMode;
421
422 let mode = config.effective_mode();
423
424 if mode == EncodingMode::ByteRange {
426 let start = config
427 .start_codepoint
428 .ok_or("ByteRange mode requires start_codepoint")?;
429 return crate::Dictionary::builder()
430 .mode(mode)
431 .start_codepoint(start)
432 .build();
433 }
434
435 let chars_str = config.effective_chars()?;
437 let chars: Vec<char> = chars_str.chars().collect();
438
439 let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
441
442 if let Some(pad_str) = &config.padding
443 && let Some(pad_char) = pad_str.chars().next()
444 {
445 builder = builder.padding(pad_char);
446 }
447
448 builder.build()
449 }
450
451 pub fn word_dictionary(
472 &self,
473 name: &str,
474 ) -> Result<crate::WordDictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
475 {
476 let config = self.get_dictionary(name).ok_or_else(|| {
477 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
478 })?;
479
480 if config.dictionary_type != DictionaryType::Word {
482 return Err(
483 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
484 name,
485 format!(
486 "Dictionary '{}' is not a word dictionary (type is {:?})",
487 name, config.dictionary_type
488 ),
489 ),
490 );
491 }
492
493 self.build_word_dictionary(config).map_err(|e| {
494 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
495 })
496 }
497
498 fn build_word_dictionary(
500 &self,
501 config: &DictionaryConfig,
502 ) -> Result<crate::WordDictionary, String> {
503 let mut builder = crate::WordDictionary::builder();
504
505 if let Some(ref words) = config.words {
507 builder = builder.words(words.clone());
508 } else if let Some(ref words_file) = config.words_file {
509 let content = match words_file.as_str() {
511 "builtin:bip39" | "builtin:bip39-english" => {
512 crate::wordlists::BIP39_ENGLISH.to_string()
513 }
514 "builtin:eff_long" | "builtin:eff-long" => crate::wordlists::EFF_LONG.to_string(),
515 "builtin:eff_short1" | "builtin:eff-short1" => {
516 crate::wordlists::EFF_SHORT1.to_string()
517 }
518 "builtin:eff_short2" | "builtin:eff-short2" => {
519 crate::wordlists::EFF_SHORT2.to_string()
520 }
521 "builtin:diceware" => crate::wordlists::DICEWARE.to_string(),
522 "builtin:pgp_even" | "builtin:pgp-even" => crate::wordlists::PGP_EVEN.to_string(),
523 "builtin:pgp_odd" | "builtin:pgp-odd" => crate::wordlists::PGP_ODD.to_string(),
524 "builtin:nato" => crate::wordlists::NATO.to_string(),
525 "builtin:buzzwords" => crate::wordlists::BUZZWORDS.to_string(),
526 "builtin:klingon" => crate::wordlists::KLINGON.to_string(),
527 "builtin:pokemon" => crate::wordlists::POKEMON.to_string(),
528 _ => {
529 let expanded = shellexpand::tilde(words_file);
531 std::fs::read_to_string(expanded.as_ref())
532 .map_err(|e| format!("Failed to read words file '{}': {}", words_file, e))?
533 }
534 };
535 builder = builder.words_from_str(&content);
536 } else {
537 return Err("Word dictionary must have 'words' or 'words_file'".to_string());
538 }
539
540 if let Some(ref delimiter) = config.delimiter {
542 builder = builder.delimiter(delimiter.clone());
543 }
544
545 if let Some(case_sensitive) = config.case_sensitive {
547 builder = builder.case_sensitive(case_sensitive);
548 }
549
550 builder.build()
551 }
552
553 pub fn dictionary_type(&self, name: &str) -> Option<DictionaryType> {
557 self.get_dictionary(name).map(|c| c.dictionary_type.clone())
558 }
559
560 pub fn is_word_dictionary(&self, name: &str) -> bool {
562 self.dictionary_type(name) == Some(DictionaryType::Word)
563 }
564}
565
566#[cfg(test)]
567mod tests {
568 use super::*;
569
570 #[test]
571 fn test_load_default_config() {
572 let config = DictionaryRegistry::load_default().unwrap();
573 assert!(config.dictionaries.contains_key("cards"));
574 }
575
576 #[test]
577 fn test_cards_dictionary_length() {
578 let config = DictionaryRegistry::load_default().unwrap();
579 let cards = config.get_dictionary("cards").unwrap();
580 assert_eq!(cards.chars.chars().count(), 52);
581 }
582
583 #[test]
584 fn test_base64_chunked_mode() {
585 let config = DictionaryRegistry::load_default().unwrap();
586 let base64 = config.get_dictionary("base64").unwrap();
587 assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
588 assert_eq!(base64.padding, Some("=".to_string()));
589 }
590
591 #[test]
592 fn test_base64_radix_mode() {
593 let config = DictionaryRegistry::load_default().unwrap();
594 let base64_radix = config.get_dictionary("base64_radix").unwrap();
595 assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
596 }
597
598 #[test]
599 fn test_auto_detection_power_of_two() {
600 let config = DictionaryConfig {
602 chars: "ABCD".to_string(), ..Default::default()
604 };
605 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
606
607 let config = DictionaryConfig {
609 chars: "ABC".to_string(), ..Default::default()
611 };
612 assert_eq!(config.effective_mode(), EncodingMode::Radix);
613 }
614
615 #[test]
616 fn test_explicit_mode_override() {
617 let config = DictionaryConfig {
619 chars: "ABCD".to_string(), mode: Some(EncodingMode::Radix), ..Default::default()
622 };
623 assert_eq!(config.effective_mode(), EncodingMode::Radix);
624 }
625
626 #[test]
627 fn test_merge_configs() {
628 let mut config1 = DictionaryRegistry {
629 dictionaries: HashMap::new(),
630 compression: HashMap::new(),
631 settings: Settings::default(),
632 };
633 config1.dictionaries.insert(
634 "test1".to_string(),
635 DictionaryConfig {
636 chars: "ABC".to_string(),
637 mode: Some(EncodingMode::Radix),
638 ..Default::default()
639 },
640 );
641
642 let mut config2 = DictionaryRegistry {
643 dictionaries: HashMap::new(),
644 compression: HashMap::new(),
645 settings: Settings::default(),
646 };
647 config2.dictionaries.insert(
648 "test2".to_string(),
649 DictionaryConfig {
650 chars: "XYZ".to_string(),
651 mode: Some(EncodingMode::Radix),
652 ..Default::default()
653 },
654 );
655 config2.dictionaries.insert(
656 "test1".to_string(),
657 DictionaryConfig {
658 chars: "DEF".to_string(),
659 mode: Some(EncodingMode::Radix),
660 ..Default::default()
661 },
662 );
663
664 config1.merge(config2);
665
666 assert_eq!(config1.dictionaries.len(), 2);
667 assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
668 assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
669 }
670
671 #[test]
672 fn test_load_from_toml_string() {
673 let toml_content = r#"
674[dictionaries.custom]
675chars = "0123456789"
676mode = "base_conversion"
677"#;
678 let config = DictionaryRegistry::from_toml(toml_content).unwrap();
679 assert!(config.dictionaries.contains_key("custom"));
680 assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
681 }
682
683 #[test]
684 fn test_effective_chars_from_explicit() {
685 let config = DictionaryConfig {
686 chars: "ABCD".to_string(),
687 ..Default::default()
688 };
689 assert_eq!(config.effective_chars().unwrap(), "ABCD");
690 }
691
692 #[test]
693 fn test_effective_chars_from_range() {
694 let config = DictionaryConfig {
695 start: Some("A".to_string()),
696 length: Some(4),
697 ..Default::default()
698 };
699 assert_eq!(config.effective_chars().unwrap(), "ABCD");
700 }
701
702 #[test]
703 fn test_effective_chars_explicit_takes_priority() {
704 let config = DictionaryConfig {
706 chars: "XYZ".to_string(),
707 start: Some("A".to_string()),
708 length: Some(4),
709 ..Default::default()
710 };
711 assert_eq!(config.effective_chars().unwrap(), "XYZ");
712 }
713
714 #[test]
715 fn test_effective_chars_unicode_range() {
716 let config = DictionaryConfig {
718 start: Some("가".to_string()), length: Some(4),
720 ..Default::default()
721 };
722 let result = config.effective_chars().unwrap();
723 assert_eq!(result.chars().count(), 4);
724 assert_eq!(result, "가각갂갃");
725 }
726
727 #[test]
728 fn test_effective_chars_surrogate_gap_error() {
729 let config = DictionaryConfig {
731 start: Some("\u{D700}".to_string()), length: Some(512), ..Default::default()
734 };
735 assert!(config.effective_chars().is_err());
736 }
737
738 #[test]
739 fn test_effective_chars_exceeds_unicode_max() {
740 let config = DictionaryConfig {
742 start: Some("\u{10FFFE}".to_string()), length: Some(10), ..Default::default()
745 };
746 assert!(config.effective_chars().is_err());
747 }
748
749 #[test]
750 fn test_effective_mode_with_length_field() {
751 let config = DictionaryConfig {
753 start: Some("A".to_string()),
754 length: Some(64), ..Default::default()
756 };
757 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
758
759 let config = DictionaryConfig {
760 start: Some("A".to_string()),
761 length: Some(52), ..Default::default()
763 };
764 assert_eq!(config.effective_mode(), EncodingMode::Radix);
765 }
766}