1use serde::Deserialize;
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)]
6#[serde(rename_all = "snake_case")]
7pub enum DictionaryType {
8 #[default]
10 Char,
11 Word,
13}
14
15#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
20#[serde(rename_all = "snake_case")]
21#[derive(Default)]
22pub enum EncodingMode {
23 #[default]
27 #[serde(alias = "base_conversion")]
28 Radix,
29 Chunked,
32 ByteRange,
35}
36
37#[derive(Debug, Deserialize, Clone)]
39pub struct DictionaryConfig {
40 #[serde(default, rename = "type")]
43 pub dictionary_type: DictionaryType,
44
45 #[serde(default)]
48 pub chars: String,
49 #[serde(default)]
52 pub start: Option<String>,
53 #[serde(default)]
56 pub length: Option<usize>,
57 #[serde(default)]
59 pub start_codepoint: Option<u32>,
60
61 #[serde(default)]
64 pub words: Option<Vec<String>>,
65 #[serde(default)]
67 pub words_file: Option<String>,
68 #[serde(default)]
70 pub delimiter: Option<String>,
71 #[serde(default)]
73 pub case_sensitive: Option<bool>,
74
75 #[serde(default)]
78 pub mode: Option<EncodingMode>,
79 #[serde(default)]
81 pub padding: Option<String>,
82 #[serde(default = "default_true")]
85 pub common: bool,
86}
87
88impl Default for DictionaryConfig {
89 fn default() -> Self {
90 Self {
91 dictionary_type: DictionaryType::default(),
92 chars: String::new(),
93 start: None,
94 length: None,
95 start_codepoint: None,
96 words: None,
97 words_file: None,
98 delimiter: None,
99 case_sensitive: None,
100 mode: None,
101 padding: None,
102 common: true, }
104 }
105}
106
107impl DictionaryConfig {
108 pub fn effective_chars(&self) -> Result<String, String> {
115 if !self.chars.is_empty() {
117 return Ok(self.chars.clone());
118 }
119
120 if let (Some(start_str), Some(length)) = (&self.start, self.length) {
122 let start_char = start_str
123 .chars()
124 .next()
125 .ok_or("start must contain at least one character")?;
126 let start_codepoint = start_char as u32;
127
128 return Self::generate_range(start_codepoint, length);
129 }
130
131 Ok(String::new())
133 }
134
135 fn generate_range(start: u32, length: usize) -> Result<String, String> {
137 const MAX_UNICODE: u32 = 0x10FFFF;
138 const SURROGATE_START: u32 = 0xD800;
139 const SURROGATE_END: u32 = 0xDFFF;
140
141 if length == 0 {
142 return Err("length must be greater than 0".to_string());
143 }
144
145 let end = start
146 .checked_add(length as u32 - 1)
147 .ok_or("range exceeds maximum Unicode codepoint")?;
148
149 if end > MAX_UNICODE {
150 return Err(format!(
151 "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
152 end, MAX_UNICODE
153 ));
154 }
155
156 let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
158 if crosses_surrogates {
159 return Err(format!(
160 "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
161 start, end
162 ));
163 }
164
165 let mut result = String::with_capacity(length * 4); for i in 0..length {
167 let codepoint = start + i as u32;
168 match char::from_u32(codepoint) {
169 Some(c) => result.push(c),
170 None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
171 }
172 }
173
174 Ok(result)
175 }
176
177 pub fn effective_mode(&self) -> EncodingMode {
184 if let Some(mode) = &self.mode {
185 return mode.clone();
186 }
187
188 let len = if self.start_codepoint.is_some() {
190 return EncodingMode::ByteRange;
193 } else if let Some(length) = self.length {
194 length
196 } else {
197 self.chars.chars().count()
198 };
199
200 if len > 0 && len.is_power_of_two() {
201 EncodingMode::Chunked
202 } else {
203 EncodingMode::Radix
204 }
205 }
206}
207
208fn default_true() -> bool {
209 true
210}
211
212#[derive(Debug, Deserialize)]
214pub struct DictionaryRegistry {
215 pub dictionaries: HashMap<String, DictionaryConfig>,
217 #[serde(default)]
219 pub compression: HashMap<String, CompressionConfig>,
220 #[serde(default)]
222 pub settings: Settings,
223}
224
225#[derive(Debug, Deserialize, Clone)]
227pub struct CompressionConfig {
228 pub default_level: u32,
230}
231
232#[derive(Debug, Deserialize, Clone, Default)]
234pub struct XxHashSettings {
235 #[serde(default)]
237 pub default_seed: u64,
238 #[serde(default)]
240 pub default_secret_file: Option<String>,
241}
242
243#[derive(Debug, Deserialize, Clone, Default)]
245pub struct Settings {
246 #[serde(default)]
248 pub default_dictionary: Option<String>,
249 #[serde(default)]
251 pub xxhash: XxHashSettings,
252}
253
254impl DictionaryRegistry {
255 pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
257 toml::from_str(content)
258 }
259
260 pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
264 let content = include_str!("../../dictionaries.toml");
265 Ok(Self::from_toml(content)?)
266 }
267
268 pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
270 let content = std::fs::read_to_string(path)?;
271 Ok(Self::from_toml(&content)?)
272 }
273
274 pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
283 let mut config = Self::load_default()?;
284
285 if let Some(config_dir) = dirs::config_dir() {
287 let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
288 if user_config_path.exists() {
289 match Self::load_from_file(&user_config_path) {
290 Ok(user_config) => {
291 config.merge(user_config);
292 }
293 Err(e) => {
294 eprintln!(
295 "Warning: Failed to load user config from {:?}: {}",
296 user_config_path, e
297 );
298 }
299 }
300 }
301 }
302
303 let local_config_path = std::path::Path::new("dictionaries.toml");
305 if local_config_path.exists() {
306 match Self::load_from_file(local_config_path) {
307 Ok(local_config) => {
308 config.merge(local_config);
309 }
310 Err(e) => {
311 eprintln!(
312 "Warning: Failed to load local config from {:?}: {}",
313 local_config_path, e
314 );
315 }
316 }
317 }
318
319 Ok(config)
320 }
321
322 pub fn merge(&mut self, other: DictionaryRegistry) {
326 for (name, dictionary) in other.dictionaries {
327 self.dictionaries.insert(name, dictionary);
328 }
329 }
330
331 pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
333 self.dictionaries.get(name)
334 }
335
336 pub fn dictionary(
354 &self,
355 name: &str,
356 ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
357 {
358 let config = self.get_dictionary(name).ok_or_else(|| {
359 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
360 })?;
361
362 self.build_dictionary(config).map_err(|e| {
363 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
364 })
365 }
366
367 pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
383 use rand::seq::IteratorRandom;
384
385 let common_names: Vec<&String> = self
386 .dictionaries
387 .iter()
388 .filter(|(_, config)| {
389 config.common && config.dictionary_type == DictionaryType::Char
391 })
392 .map(|(name, _)| name)
393 .collect();
394
395 let name = common_names
396 .into_iter()
397 .choose(&mut rand::rng())
398 .ok_or("No common dictionaries available")?;
399
400 let dict = self.dictionary(name)?;
401 Ok((name.clone(), dict))
402 }
403
404 pub fn names(&self) -> Vec<&str> {
406 self.dictionaries.keys().map(|s| s.as_str()).collect()
407 }
408
409 pub fn common_names(&self) -> Vec<&str> {
411 self.dictionaries
412 .iter()
413 .filter(|(_, config)| config.common)
414 .map(|(name, _)| name.as_str())
415 .collect()
416 }
417
418 fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
420 use crate::core::config::EncodingMode;
421
422 let mode = config.effective_mode();
423
424 if mode == EncodingMode::ByteRange {
426 let start = config
427 .start_codepoint
428 .ok_or("ByteRange mode requires start_codepoint")?;
429 return crate::Dictionary::builder()
430 .mode(mode)
431 .start_codepoint(start)
432 .build();
433 }
434
435 let chars_str = config.effective_chars()?;
437 let chars: Vec<char> = chars_str.chars().collect();
438
439 let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
441
442 if let Some(pad_str) = &config.padding
443 && let Some(pad_char) = pad_str.chars().next()
444 {
445 builder = builder.padding(pad_char);
446 }
447
448 builder.build()
449 }
450
451 pub fn word_dictionary(
472 &self,
473 name: &str,
474 ) -> Result<crate::WordDictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
475 {
476 let config = self.get_dictionary(name).ok_or_else(|| {
477 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
478 })?;
479
480 if config.dictionary_type != DictionaryType::Word {
482 return Err(
483 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
484 name,
485 format!(
486 "Dictionary '{}' is not a word dictionary (type is {:?})",
487 name, config.dictionary_type
488 ),
489 ),
490 );
491 }
492
493 self.build_word_dictionary(config).map_err(|e| {
494 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
495 })
496 }
497
498 fn build_word_dictionary(
500 &self,
501 config: &DictionaryConfig,
502 ) -> Result<crate::WordDictionary, String> {
503 let mut builder = crate::WordDictionary::builder();
504
505 if let Some(ref words) = config.words {
507 builder = builder.words(words.clone());
508 } else if let Some(ref words_file) = config.words_file {
509 let content = match words_file.as_str() {
511 "builtin:bip39" | "builtin:bip39-english" => {
512 crate::wordlists::BIP39_ENGLISH.to_string()
513 }
514 "builtin:eff_long" | "builtin:eff-long" => crate::wordlists::EFF_LONG.to_string(),
515 "builtin:eff_short1" | "builtin:eff-short1" => {
516 crate::wordlists::EFF_SHORT1.to_string()
517 }
518 "builtin:eff_short2" | "builtin:eff-short2" => {
519 crate::wordlists::EFF_SHORT2.to_string()
520 }
521 "builtin:diceware" => crate::wordlists::DICEWARE.to_string(),
522 "builtin:pgp_even" | "builtin:pgp-even" => crate::wordlists::PGP_EVEN.to_string(),
523 "builtin:pgp_odd" | "builtin:pgp-odd" => crate::wordlists::PGP_ODD.to_string(),
524 _ => {
525 let expanded = shellexpand::tilde(words_file);
527 std::fs::read_to_string(expanded.as_ref())
528 .map_err(|e| format!("Failed to read words file '{}': {}", words_file, e))?
529 }
530 };
531 builder = builder.words_from_str(&content);
532 } else {
533 return Err("Word dictionary must have 'words' or 'words_file'".to_string());
534 }
535
536 if let Some(ref delimiter) = config.delimiter {
538 builder = builder.delimiter(delimiter.clone());
539 }
540
541 if let Some(case_sensitive) = config.case_sensitive {
543 builder = builder.case_sensitive(case_sensitive);
544 }
545
546 builder.build()
547 }
548
549 pub fn dictionary_type(&self, name: &str) -> Option<DictionaryType> {
553 self.get_dictionary(name).map(|c| c.dictionary_type.clone())
554 }
555
556 pub fn is_word_dictionary(&self, name: &str) -> bool {
558 self.dictionary_type(name) == Some(DictionaryType::Word)
559 }
560}
561
562#[cfg(test)]
563mod tests {
564 use super::*;
565
566 #[test]
567 fn test_load_default_config() {
568 let config = DictionaryRegistry::load_default().unwrap();
569 assert!(config.dictionaries.contains_key("cards"));
570 }
571
572 #[test]
573 fn test_cards_dictionary_length() {
574 let config = DictionaryRegistry::load_default().unwrap();
575 let cards = config.get_dictionary("cards").unwrap();
576 assert_eq!(cards.chars.chars().count(), 52);
577 }
578
579 #[test]
580 fn test_base64_chunked_mode() {
581 let config = DictionaryRegistry::load_default().unwrap();
582 let base64 = config.get_dictionary("base64").unwrap();
583 assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
584 assert_eq!(base64.padding, Some("=".to_string()));
585 }
586
587 #[test]
588 fn test_base64_radix_mode() {
589 let config = DictionaryRegistry::load_default().unwrap();
590 let base64_radix = config.get_dictionary("base64_radix").unwrap();
591 assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
592 }
593
594 #[test]
595 fn test_auto_detection_power_of_two() {
596 let config = DictionaryConfig {
598 chars: "ABCD".to_string(), ..Default::default()
600 };
601 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
602
603 let config = DictionaryConfig {
605 chars: "ABC".to_string(), ..Default::default()
607 };
608 assert_eq!(config.effective_mode(), EncodingMode::Radix);
609 }
610
611 #[test]
612 fn test_explicit_mode_override() {
613 let config = DictionaryConfig {
615 chars: "ABCD".to_string(), mode: Some(EncodingMode::Radix), ..Default::default()
618 };
619 assert_eq!(config.effective_mode(), EncodingMode::Radix);
620 }
621
622 #[test]
623 fn test_merge_configs() {
624 let mut config1 = DictionaryRegistry {
625 dictionaries: HashMap::new(),
626 compression: HashMap::new(),
627 settings: Settings::default(),
628 };
629 config1.dictionaries.insert(
630 "test1".to_string(),
631 DictionaryConfig {
632 chars: "ABC".to_string(),
633 mode: Some(EncodingMode::Radix),
634 ..Default::default()
635 },
636 );
637
638 let mut config2 = DictionaryRegistry {
639 dictionaries: HashMap::new(),
640 compression: HashMap::new(),
641 settings: Settings::default(),
642 };
643 config2.dictionaries.insert(
644 "test2".to_string(),
645 DictionaryConfig {
646 chars: "XYZ".to_string(),
647 mode: Some(EncodingMode::Radix),
648 ..Default::default()
649 },
650 );
651 config2.dictionaries.insert(
652 "test1".to_string(),
653 DictionaryConfig {
654 chars: "DEF".to_string(),
655 mode: Some(EncodingMode::Radix),
656 ..Default::default()
657 },
658 );
659
660 config1.merge(config2);
661
662 assert_eq!(config1.dictionaries.len(), 2);
663 assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
664 assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
665 }
666
667 #[test]
668 fn test_load_from_toml_string() {
669 let toml_content = r#"
670[dictionaries.custom]
671chars = "0123456789"
672mode = "base_conversion"
673"#;
674 let config = DictionaryRegistry::from_toml(toml_content).unwrap();
675 assert!(config.dictionaries.contains_key("custom"));
676 assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
677 }
678
679 #[test]
680 fn test_effective_chars_from_explicit() {
681 let config = DictionaryConfig {
682 chars: "ABCD".to_string(),
683 ..Default::default()
684 };
685 assert_eq!(config.effective_chars().unwrap(), "ABCD");
686 }
687
688 #[test]
689 fn test_effective_chars_from_range() {
690 let config = DictionaryConfig {
691 start: Some("A".to_string()),
692 length: Some(4),
693 ..Default::default()
694 };
695 assert_eq!(config.effective_chars().unwrap(), "ABCD");
696 }
697
698 #[test]
699 fn test_effective_chars_explicit_takes_priority() {
700 let config = DictionaryConfig {
702 chars: "XYZ".to_string(),
703 start: Some("A".to_string()),
704 length: Some(4),
705 ..Default::default()
706 };
707 assert_eq!(config.effective_chars().unwrap(), "XYZ");
708 }
709
710 #[test]
711 fn test_effective_chars_unicode_range() {
712 let config = DictionaryConfig {
714 start: Some("가".to_string()), length: Some(4),
716 ..Default::default()
717 };
718 let result = config.effective_chars().unwrap();
719 assert_eq!(result.chars().count(), 4);
720 assert_eq!(result, "가각갂갃");
721 }
722
723 #[test]
724 fn test_effective_chars_surrogate_gap_error() {
725 let config = DictionaryConfig {
727 start: Some("\u{D700}".to_string()), length: Some(512), ..Default::default()
730 };
731 assert!(config.effective_chars().is_err());
732 }
733
734 #[test]
735 fn test_effective_chars_exceeds_unicode_max() {
736 let config = DictionaryConfig {
738 start: Some("\u{10FFFE}".to_string()), length: Some(10), ..Default::default()
741 };
742 assert!(config.effective_chars().is_err());
743 }
744
745 #[test]
746 fn test_effective_mode_with_length_field() {
747 let config = DictionaryConfig {
749 start: Some("A".to_string()),
750 length: Some(64), ..Default::default()
752 };
753 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
754
755 let config = DictionaryConfig {
756 start: Some("A".to_string()),
757 length: Some(52), ..Default::default()
759 };
760 assert_eq!(config.effective_mode(), EncodingMode::Radix);
761 }
762}