1use serde::Deserialize;
2use std::collections::HashMap;
3
4include!(concat!(env!("OUT_DIR"), "/registry.rs"));
6
7#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)]
9#[serde(rename_all = "snake_case")]
10pub enum DictionaryType {
11 #[default]
13 Char,
14 Word,
16}
17
18#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
23#[serde(rename_all = "snake_case")]
24#[derive(Default)]
25pub enum EncodingMode {
26 #[default]
30 #[serde(alias = "base_conversion")]
31 Radix,
32 Chunked,
35 ByteRange,
38}
39
40#[derive(Debug, Deserialize, Clone)]
42pub struct DictionaryConfig {
43 #[serde(default, rename = "type")]
46 pub dictionary_type: DictionaryType,
47
48 #[serde(default)]
51 pub chars: String,
52 #[serde(default)]
55 pub start: Option<String>,
56 #[serde(default)]
59 pub length: Option<usize>,
60 #[serde(default)]
62 pub start_codepoint: Option<u32>,
63
64 #[serde(default)]
67 pub words: Option<Vec<String>>,
68 #[serde(default)]
70 pub words_file: Option<String>,
71 #[serde(default)]
73 pub delimiter: Option<String>,
74 #[serde(default)]
76 pub case_sensitive: Option<bool>,
77
78 #[serde(default)]
81 pub mode: Option<EncodingMode>,
82 #[serde(default)]
84 pub padding: Option<String>,
85 #[serde(default = "default_true")]
88 pub common: bool,
89}
90
91impl Default for DictionaryConfig {
92 fn default() -> Self {
93 Self {
94 dictionary_type: DictionaryType::default(),
95 chars: String::new(),
96 start: None,
97 length: None,
98 start_codepoint: None,
99 words: None,
100 words_file: None,
101 delimiter: None,
102 case_sensitive: None,
103 mode: None,
104 padding: None,
105 common: true, }
107 }
108}
109
110impl DictionaryConfig {
111 pub fn effective_chars(&self) -> Result<String, String> {
118 if !self.chars.is_empty() {
120 return Ok(self.chars.clone());
121 }
122
123 if let (Some(start_str), Some(length)) = (&self.start, self.length) {
125 let start_char = start_str
126 .chars()
127 .next()
128 .ok_or("start must contain at least one character")?;
129 let start_codepoint = start_char as u32;
130
131 return Self::generate_range(start_codepoint, length);
132 }
133
134 Ok(String::new())
136 }
137
138 fn generate_range(start: u32, length: usize) -> Result<String, String> {
140 const MAX_UNICODE: u32 = 0x10FFFF;
141 const SURROGATE_START: u32 = 0xD800;
142 const SURROGATE_END: u32 = 0xDFFF;
143
144 if length == 0 {
145 return Err("length must be greater than 0".to_string());
146 }
147
148 let end = start
149 .checked_add(length as u32 - 1)
150 .ok_or("range exceeds maximum Unicode codepoint")?;
151
152 if end > MAX_UNICODE {
153 return Err(format!(
154 "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
155 end, MAX_UNICODE
156 ));
157 }
158
159 let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
161 if crosses_surrogates {
162 return Err(format!(
163 "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
164 start, end
165 ));
166 }
167
168 let mut result = String::with_capacity(length * 4); for i in 0..length {
170 let codepoint = start + i as u32;
171 match char::from_u32(codepoint) {
172 Some(c) => result.push(c),
173 None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
174 }
175 }
176
177 Ok(result)
178 }
179
180 pub fn effective_mode(&self) -> EncodingMode {
187 if let Some(mode) = &self.mode {
188 return mode.clone();
189 }
190
191 let len = if self.start_codepoint.is_some() {
193 return EncodingMode::ByteRange;
196 } else if let Some(length) = self.length {
197 length
199 } else {
200 self.chars.chars().count()
201 };
202
203 if len > 0 && len.is_power_of_two() {
204 EncodingMode::Chunked
205 } else {
206 EncodingMode::Radix
207 }
208 }
209}
210
211fn default_true() -> bool {
212 true
213}
214
215#[derive(Debug, Deserialize)]
217pub struct DictionaryRegistry {
218 pub dictionaries: HashMap<String, DictionaryConfig>,
220 #[serde(default)]
222 pub compression: HashMap<String, CompressionConfig>,
223 #[serde(default)]
225 pub settings: Settings,
226}
227
228#[derive(Debug, Deserialize, Clone)]
230pub struct CompressionConfig {
231 pub default_level: u32,
233}
234
235#[derive(Debug, Deserialize, Clone, Default)]
237pub struct XxHashSettings {
238 #[serde(default)]
240 pub default_seed: u64,
241 #[serde(default)]
243 pub default_secret_file: Option<String>,
244}
245
246#[derive(Debug, Deserialize, Clone, Default)]
248pub struct Settings {
249 #[serde(default)]
251 pub default_dictionary: Option<String>,
252 #[serde(default)]
254 pub xxhash: XxHashSettings,
255}
256
257impl DictionaryRegistry {
258 pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
260 toml::from_str(content)
261 }
262
263 pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
267 Ok(Self {
268 dictionaries: build_registry(),
269 compression: HashMap::new(),
270 settings: Settings::default(),
271 })
272 }
273
274 pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
276 let content = std::fs::read_to_string(path)?;
277 Ok(Self::from_toml(&content)?)
278 }
279
280 pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
289 let mut config = Self::load_default()?;
290
291 if let Some(config_dir) = dirs::config_dir() {
293 let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
294 if user_config_path.exists() {
295 match Self::load_from_file(&user_config_path) {
296 Ok(user_config) => {
297 config.merge(user_config);
298 }
299 Err(e) => {
300 eprintln!(
301 "Warning: Failed to load user config from {:?}: {}",
302 user_config_path, e
303 );
304 }
305 }
306 }
307 }
308
309 let local_config_path = std::path::Path::new("dictionaries.toml");
311 if local_config_path.exists() {
312 match Self::load_from_file(local_config_path) {
313 Ok(local_config) => {
314 config.merge(local_config);
315 }
316 Err(e) => {
317 eprintln!(
318 "Warning: Failed to load local config from {:?}: {}",
319 local_config_path, e
320 );
321 }
322 }
323 }
324
325 Ok(config)
326 }
327
328 pub fn merge(&mut self, other: DictionaryRegistry) {
332 for (name, dictionary) in other.dictionaries {
333 self.dictionaries.insert(name, dictionary);
334 }
335 }
336
337 pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
339 self.dictionaries.get(name)
340 }
341
342 pub fn dictionary(
360 &self,
361 name: &str,
362 ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
363 {
364 let config = self.get_dictionary(name).ok_or_else(|| {
365 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
366 })?;
367
368 self.build_dictionary(config).map_err(|e| {
369 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
370 })
371 }
372
373 pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
389 use rand::seq::IteratorRandom;
390
391 let common_names: Vec<&String> = self
392 .dictionaries
393 .iter()
394 .filter(|(_, config)| {
395 config.common && config.dictionary_type == DictionaryType::Char
397 })
398 .map(|(name, _)| name)
399 .collect();
400
401 let name = common_names
402 .into_iter()
403 .choose(&mut rand::rng())
404 .ok_or("No common dictionaries available")?;
405
406 let dict = self.dictionary(name)?;
407 Ok((name.clone(), dict))
408 }
409
410 pub fn names(&self) -> Vec<&str> {
412 self.dictionaries.keys().map(|s| s.as_str()).collect()
413 }
414
415 pub fn common_names(&self) -> Vec<&str> {
417 self.dictionaries
418 .iter()
419 .filter(|(_, config)| config.common)
420 .map(|(name, _)| name.as_str())
421 .collect()
422 }
423
424 fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
426 use crate::core::config::EncodingMode;
427
428 let mode = config.effective_mode();
429
430 if mode == EncodingMode::ByteRange {
432 let start = config
433 .start_codepoint
434 .ok_or("ByteRange mode requires start_codepoint")?;
435 return crate::Dictionary::builder()
436 .mode(mode)
437 .start_codepoint(start)
438 .build();
439 }
440
441 let chars_str = config.effective_chars()?;
443 let chars: Vec<char> = chars_str.chars().collect();
444
445 let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
447
448 if let Some(pad_str) = &config.padding
449 && let Some(pad_char) = pad_str.chars().next()
450 {
451 builder = builder.padding(pad_char);
452 }
453
454 builder.build()
455 }
456
457 pub fn word_dictionary(
478 &self,
479 name: &str,
480 ) -> Result<crate::WordDictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
481 {
482 let config = self.get_dictionary(name).ok_or_else(|| {
483 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
484 })?;
485
486 if config.dictionary_type != DictionaryType::Word {
488 return Err(
489 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
490 name,
491 format!(
492 "Dictionary '{}' is not a word dictionary (type is {:?})",
493 name, config.dictionary_type
494 ),
495 ),
496 );
497 }
498
499 self.build_word_dictionary(config).map_err(|e| {
500 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
501 })
502 }
503
504 fn build_word_dictionary(
506 &self,
507 config: &DictionaryConfig,
508 ) -> Result<crate::WordDictionary, String> {
509 let mut builder = crate::WordDictionary::builder();
510
511 if let Some(ref words) = config.words {
513 builder = builder.words(words.clone());
514 } else if let Some(ref words_file) = config.words_file {
515 let content = if let Some(embedded) = get_embedded_wordlist(words_file) {
517 embedded.to_string()
518 } else {
519 match words_file.as_str() {
521 "builtin:bip39" | "builtin:bip39-english" => {
522 crate::wordlists::BIP39_ENGLISH.to_string()
523 }
524 "builtin:eff_long" | "builtin:eff-long" => {
525 crate::wordlists::EFF_LONG.to_string()
526 }
527 "builtin:eff_short1" | "builtin:eff-short1" => {
528 crate::wordlists::EFF_SHORT1.to_string()
529 }
530 "builtin:eff_short2" | "builtin:eff-short2" => {
531 crate::wordlists::EFF_SHORT2.to_string()
532 }
533 "builtin:diceware" => crate::wordlists::DICEWARE.to_string(),
534 "builtin:pgp_even" | "builtin:pgp-even" => {
535 crate::wordlists::PGP_EVEN.to_string()
536 }
537 "builtin:pgp_odd" | "builtin:pgp-odd" => crate::wordlists::PGP_ODD.to_string(),
538 "builtin:nato" => crate::wordlists::NATO.to_string(),
539 "builtin:buzzwords" => crate::wordlists::BUZZWORDS.to_string(),
540 "builtin:klingon" => crate::wordlists::KLINGON.to_string(),
541 "builtin:pokemon" => crate::wordlists::POKEMON.to_string(),
542 _ => {
543 let expanded = shellexpand::tilde(words_file);
545 std::fs::read_to_string(expanded.as_ref()).map_err(|e| {
546 format!("Failed to read words file '{}': {}", words_file, e)
547 })?
548 }
549 }
550 };
551 builder = builder.words_from_str(&content);
552 } else {
553 return Err("Word dictionary must have 'words' or 'words_file'".to_string());
554 }
555
556 if let Some(ref delimiter) = config.delimiter {
558 builder = builder.delimiter(delimiter.clone());
559 }
560
561 if let Some(case_sensitive) = config.case_sensitive {
563 builder = builder.case_sensitive(case_sensitive);
564 }
565
566 builder.build()
567 }
568
569 pub fn dictionary_type(&self, name: &str) -> Option<DictionaryType> {
573 self.get_dictionary(name).map(|c| c.dictionary_type.clone())
574 }
575
576 pub fn is_word_dictionary(&self, name: &str) -> bool {
578 self.dictionary_type(name) == Some(DictionaryType::Word)
579 }
580}
581
582#[cfg(test)]
583mod tests {
584 use super::*;
585
586 #[test]
587 fn test_load_default_config() {
588 let config = DictionaryRegistry::load_default().unwrap();
589 assert!(config.dictionaries.contains_key("cards"));
590 }
591
592 #[test]
593 fn test_cards_dictionary_length() {
594 let config = DictionaryRegistry::load_default().unwrap();
595 let cards = config.get_dictionary("cards").unwrap();
596 assert_eq!(cards.chars.chars().count(), 52);
597 }
598
599 #[test]
600 fn test_base64_chunked_mode() {
601 let config = DictionaryRegistry::load_default().unwrap();
602 let base64 = config.get_dictionary("base64").unwrap();
603 assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
604 assert_eq!(base64.padding, Some("=".to_string()));
605 }
606
607 #[test]
608 fn test_base64_radix_mode() {
609 let config = DictionaryRegistry::load_default().unwrap();
610 let base64_radix = config.get_dictionary("base64_radix").unwrap();
611 assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
612 }
613
614 #[test]
615 fn test_auto_detection_power_of_two() {
616 let config = DictionaryConfig {
618 chars: "ABCD".to_string(), ..Default::default()
620 };
621 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
622
623 let config = DictionaryConfig {
625 chars: "ABC".to_string(), ..Default::default()
627 };
628 assert_eq!(config.effective_mode(), EncodingMode::Radix);
629 }
630
631 #[test]
632 fn test_explicit_mode_override() {
633 let config = DictionaryConfig {
635 chars: "ABCD".to_string(), mode: Some(EncodingMode::Radix), ..Default::default()
638 };
639 assert_eq!(config.effective_mode(), EncodingMode::Radix);
640 }
641
642 #[test]
643 fn test_merge_configs() {
644 let mut config1 = DictionaryRegistry {
645 dictionaries: HashMap::new(),
646 compression: HashMap::new(),
647 settings: Settings::default(),
648 };
649 config1.dictionaries.insert(
650 "test1".to_string(),
651 DictionaryConfig {
652 chars: "ABC".to_string(),
653 mode: Some(EncodingMode::Radix),
654 ..Default::default()
655 },
656 );
657
658 let mut config2 = DictionaryRegistry {
659 dictionaries: HashMap::new(),
660 compression: HashMap::new(),
661 settings: Settings::default(),
662 };
663 config2.dictionaries.insert(
664 "test2".to_string(),
665 DictionaryConfig {
666 chars: "XYZ".to_string(),
667 mode: Some(EncodingMode::Radix),
668 ..Default::default()
669 },
670 );
671 config2.dictionaries.insert(
672 "test1".to_string(),
673 DictionaryConfig {
674 chars: "DEF".to_string(),
675 mode: Some(EncodingMode::Radix),
676 ..Default::default()
677 },
678 );
679
680 config1.merge(config2);
681
682 assert_eq!(config1.dictionaries.len(), 2);
683 assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
684 assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
685 }
686
687 #[test]
688 fn test_load_from_toml_string() {
689 let toml_content = r#"
690[dictionaries.custom]
691chars = "0123456789"
692mode = "base_conversion"
693"#;
694 let config = DictionaryRegistry::from_toml(toml_content).unwrap();
695 assert!(config.dictionaries.contains_key("custom"));
696 assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
697 }
698
699 #[test]
700 fn test_effective_chars_from_explicit() {
701 let config = DictionaryConfig {
702 chars: "ABCD".to_string(),
703 ..Default::default()
704 };
705 assert_eq!(config.effective_chars().unwrap(), "ABCD");
706 }
707
708 #[test]
709 fn test_effective_chars_from_range() {
710 let config = DictionaryConfig {
711 start: Some("A".to_string()),
712 length: Some(4),
713 ..Default::default()
714 };
715 assert_eq!(config.effective_chars().unwrap(), "ABCD");
716 }
717
718 #[test]
719 fn test_effective_chars_explicit_takes_priority() {
720 let config = DictionaryConfig {
722 chars: "XYZ".to_string(),
723 start: Some("A".to_string()),
724 length: Some(4),
725 ..Default::default()
726 };
727 assert_eq!(config.effective_chars().unwrap(), "XYZ");
728 }
729
730 #[test]
731 fn test_effective_chars_unicode_range() {
732 let config = DictionaryConfig {
734 start: Some("가".to_string()), length: Some(4),
736 ..Default::default()
737 };
738 let result = config.effective_chars().unwrap();
739 assert_eq!(result.chars().count(), 4);
740 assert_eq!(result, "가각갂갃");
741 }
742
743 #[test]
744 fn test_effective_chars_surrogate_gap_error() {
745 let config = DictionaryConfig {
747 start: Some("\u{D700}".to_string()), length: Some(512), ..Default::default()
750 };
751 assert!(config.effective_chars().is_err());
752 }
753
754 #[test]
755 fn test_effective_chars_exceeds_unicode_max() {
756 let config = DictionaryConfig {
758 start: Some("\u{10FFFE}".to_string()), length: Some(10), ..Default::default()
761 };
762 assert!(config.effective_chars().is_err());
763 }
764
765 #[test]
766 fn test_effective_mode_with_length_field() {
767 let config = DictionaryConfig {
769 start: Some("A".to_string()),
770 length: Some(64), ..Default::default()
772 };
773 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
774
775 let config = DictionaryConfig {
776 start: Some("A".to_string()),
777 length: Some(52), ..Default::default()
779 };
780 assert_eq!(config.effective_mode(), EncodingMode::Radix);
781 }
782}