1use serde::Deserialize;
2use std::collections::HashMap;
3
4include!(concat!(env!("OUT_DIR"), "/registry.rs"));
6
7#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)]
9#[serde(rename_all = "snake_case")]
10pub enum DictionaryType {
11 #[default]
13 Char,
14 Word,
16}
17
18#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
23#[serde(rename_all = "snake_case")]
24#[derive(Default)]
25pub enum EncodingMode {
26 #[default]
30 #[serde(alias = "base_conversion")]
31 Radix,
32 Chunked,
35 ByteRange,
38}
39
40#[derive(Debug, Deserialize, Clone)]
42pub struct DictionaryConfig {
43 #[serde(default, rename = "type")]
46 pub dictionary_type: DictionaryType,
47
48 #[serde(default)]
51 pub chars: String,
52 #[serde(default)]
55 pub start: Option<String>,
56 #[serde(default)]
59 pub length: Option<usize>,
60 #[serde(default)]
62 pub start_codepoint: Option<u32>,
63
64 #[serde(default)]
67 pub words: Option<Vec<String>>,
68 #[serde(default)]
70 pub words_file: Option<String>,
71 #[serde(default)]
73 pub delimiter: Option<String>,
74 #[serde(default)]
76 pub case_sensitive: Option<bool>,
77 #[serde(default)]
79 pub alternating: Option<Vec<String>>,
80
81 #[serde(default)]
84 pub mode: Option<EncodingMode>,
85 #[serde(default)]
87 pub padding: Option<String>,
88 #[serde(default = "default_true")]
91 pub common: bool,
92}
93
94impl Default for DictionaryConfig {
95 fn default() -> Self {
96 Self {
97 dictionary_type: DictionaryType::default(),
98 chars: String::new(),
99 start: None,
100 length: None,
101 start_codepoint: None,
102 words: None,
103 words_file: None,
104 delimiter: None,
105 case_sensitive: None,
106 alternating: None,
107 mode: None,
108 padding: None,
109 common: true, }
111 }
112}
113
114impl DictionaryConfig {
115 pub fn effective_chars(&self) -> Result<String, String> {
122 if !self.chars.is_empty() {
124 return Ok(self.chars.clone());
125 }
126
127 if let (Some(start_str), Some(length)) = (&self.start, self.length) {
129 let start_char = start_str
130 .chars()
131 .next()
132 .ok_or("start must contain at least one character")?;
133 let start_codepoint = start_char as u32;
134
135 return Self::generate_range(start_codepoint, length);
136 }
137
138 Ok(String::new())
140 }
141
142 fn generate_range(start: u32, length: usize) -> Result<String, String> {
144 const MAX_UNICODE: u32 = 0x10FFFF;
145 const SURROGATE_START: u32 = 0xD800;
146 const SURROGATE_END: u32 = 0xDFFF;
147
148 if length == 0 {
149 return Err("length must be greater than 0".to_string());
150 }
151
152 let end = start
153 .checked_add(length as u32 - 1)
154 .ok_or("range exceeds maximum Unicode codepoint")?;
155
156 if end > MAX_UNICODE {
157 return Err(format!(
158 "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
159 end, MAX_UNICODE
160 ));
161 }
162
163 let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
165 if crosses_surrogates {
166 return Err(format!(
167 "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
168 start, end
169 ));
170 }
171
172 let mut result = String::with_capacity(length * 4); for i in 0..length {
174 let codepoint = start + i as u32;
175 match char::from_u32(codepoint) {
176 Some(c) => result.push(c),
177 None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
178 }
179 }
180
181 Ok(result)
182 }
183
184 pub fn effective_mode(&self) -> EncodingMode {
191 if let Some(mode) = &self.mode {
192 return mode.clone();
193 }
194
195 let len = if self.start_codepoint.is_some() {
197 return EncodingMode::ByteRange;
200 } else if let Some(length) = self.length {
201 length
203 } else {
204 self.chars.chars().count()
205 };
206
207 if len > 0 && len.is_power_of_two() {
208 EncodingMode::Chunked
209 } else {
210 EncodingMode::Radix
211 }
212 }
213}
214
215fn default_true() -> bool {
216 true
217}
218
219#[derive(Debug, Deserialize)]
221pub struct DictionaryRegistry {
222 pub dictionaries: HashMap<String, DictionaryConfig>,
224 #[serde(default)]
226 pub compression: HashMap<String, CompressionConfig>,
227 #[serde(default)]
229 pub settings: Settings,
230}
231
232#[derive(Debug, Deserialize, Clone)]
234pub struct CompressionConfig {
235 pub default_level: u32,
237}
238
239#[derive(Debug, Deserialize, Clone, Default)]
241pub struct XxHashSettings {
242 #[serde(default)]
244 pub default_seed: u64,
245 #[serde(default)]
247 pub default_secret_file: Option<String>,
248}
249
250#[derive(Debug, Deserialize, Clone, Default)]
252pub struct Settings {
253 #[serde(default)]
255 pub default_dictionary: Option<String>,
256 #[serde(default)]
258 pub xxhash: XxHashSettings,
259}
260
261impl DictionaryRegistry {
262 pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
264 toml::from_str(content)
265 }
266
267 pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
271 Ok(Self {
272 dictionaries: build_registry(),
273 compression: HashMap::new(),
274 settings: Settings::default(),
275 })
276 }
277
278 pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
280 let content = std::fs::read_to_string(path)?;
281 Ok(Self::from_toml(&content)?)
282 }
283
284 pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
293 let mut config = Self::load_default()?;
294
295 if let Some(config_dir) = dirs::config_dir() {
297 let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
298 if user_config_path.exists() {
299 match Self::load_from_file(&user_config_path) {
300 Ok(user_config) => {
301 config.merge(user_config);
302 }
303 Err(e) => {
304 eprintln!(
305 "Warning: Failed to load user config from {:?}: {}",
306 user_config_path, e
307 );
308 }
309 }
310 }
311 }
312
313 let local_config_path = std::path::Path::new("dictionaries.toml");
315 if local_config_path.exists() {
316 match Self::load_from_file(local_config_path) {
317 Ok(local_config) => {
318 config.merge(local_config);
319 }
320 Err(e) => {
321 eprintln!(
322 "Warning: Failed to load local config from {:?}: {}",
323 local_config_path, e
324 );
325 }
326 }
327 }
328
329 Ok(config)
330 }
331
332 pub fn merge(&mut self, other: DictionaryRegistry) {
336 for (name, dictionary) in other.dictionaries {
337 self.dictionaries.insert(name, dictionary);
338 }
339 }
340
341 pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
343 self.dictionaries.get(name)
344 }
345
346 pub fn dictionary(
364 &self,
365 name: &str,
366 ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
367 {
368 let config = self.get_dictionary(name).ok_or_else(|| {
369 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
370 })?;
371
372 self.build_dictionary(config).map_err(|e| {
373 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
374 })
375 }
376
377 pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
393 use crate::core::dictionary::is_safe_byte_range;
394 use rand::seq::IteratorRandom;
395
396 let common_names: Vec<&String> = self
397 .dictionaries
398 .iter()
399 .filter(|(_, config)| {
400 if !config.common || config.dictionary_type != DictionaryType::Char {
402 return false;
403 }
404
405 if config.effective_mode() == EncodingMode::ByteRange {
408 if let Some(start) = config.start_codepoint {
409 return is_safe_byte_range(start);
410 }
411 return false; }
413
414 true
415 })
416 .map(|(name, _)| name)
417 .collect();
418
419 let name = common_names
420 .into_iter()
421 .choose(&mut rand::rng())
422 .ok_or("No common dictionaries available")?;
423
424 let dict = self.dictionary(name)?;
425 Ok((name.clone(), dict))
426 }
427
428 pub fn names(&self) -> Vec<&str> {
430 self.dictionaries.keys().map(|s| s.as_str()).collect()
431 }
432
433 pub fn common_names(&self) -> Vec<&str> {
438 use crate::core::dictionary::is_safe_byte_range;
439
440 self.dictionaries
441 .iter()
442 .filter(|(_, config)| {
443 if !config.common || config.dictionary_type != DictionaryType::Char {
444 return false;
445 }
446
447 if config.effective_mode() == EncodingMode::ByteRange {
449 if let Some(start) = config.start_codepoint {
450 return is_safe_byte_range(start);
451 }
452 return false;
453 }
454
455 true
456 })
457 .map(|(name, _)| name.as_str())
458 .collect()
459 }
460
461 fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
463 use crate::core::config::EncodingMode;
464
465 let mode = config.effective_mode();
466
467 if mode == EncodingMode::ByteRange {
469 let start = config
470 .start_codepoint
471 .ok_or("ByteRange mode requires start_codepoint")?;
472 return crate::Dictionary::builder()
473 .mode(mode)
474 .start_codepoint(start)
475 .build();
476 }
477
478 let chars_str = config.effective_chars()?;
480 let chars: Vec<char> = chars_str.chars().collect();
481
482 let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
484
485 if let Some(pad_str) = &config.padding
486 && let Some(pad_char) = pad_str.chars().next()
487 {
488 builder = builder.padding(pad_char);
489 }
490
491 builder.build()
492 }
493
494 pub fn word_dictionary(
515 &self,
516 name: &str,
517 ) -> Result<crate::WordDictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
518 {
519 let config = self.get_dictionary(name).ok_or_else(|| {
520 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
521 })?;
522
523 if config.dictionary_type != DictionaryType::Word {
525 return Err(
526 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
527 name,
528 format!(
529 "Dictionary '{}' is not a word dictionary (type is {:?})",
530 name, config.dictionary_type
531 ),
532 ),
533 );
534 }
535
536 self.build_word_dictionary(config).map_err(|e| {
537 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
538 })
539 }
540
541 fn build_word_dictionary(
543 &self,
544 config: &DictionaryConfig,
545 ) -> Result<crate::WordDictionary, String> {
546 let mut builder = crate::WordDictionary::builder();
547
548 if let Some(ref words) = config.words {
550 builder = builder.words(words.clone());
551 } else if let Some(ref words_file) = config.words_file {
552 let content = if let Some(embedded) = get_embedded_wordlist(words_file) {
554 embedded.to_string()
555 } else {
556 match words_file.as_str() {
558 "builtin:bip39" | "builtin:bip39-english" => {
559 crate::wordlists::BIP39_ENGLISH.to_string()
560 }
561 "builtin:eff_long" | "builtin:eff-long" => {
562 crate::wordlists::EFF_LONG.to_string()
563 }
564 "builtin:eff_short1" | "builtin:eff-short1" => {
565 crate::wordlists::EFF_SHORT1.to_string()
566 }
567 "builtin:eff_short2" | "builtin:eff-short2" => {
568 crate::wordlists::EFF_SHORT2.to_string()
569 }
570 "builtin:diceware" => crate::wordlists::DICEWARE.to_string(),
571 "builtin:pgp_even" | "builtin:pgp-even" => {
572 crate::wordlists::PGP_EVEN.to_string()
573 }
574 "builtin:pgp_odd" | "builtin:pgp-odd" => crate::wordlists::PGP_ODD.to_string(),
575 "builtin:nato" => crate::wordlists::NATO.to_string(),
576 "builtin:buzzwords" => crate::wordlists::BUZZWORDS.to_string(),
577 "builtin:klingon" => crate::wordlists::KLINGON.to_string(),
578 "builtin:pokemon" => crate::wordlists::POKEMON.to_string(),
579 _ => {
580 let expanded = shellexpand::tilde(words_file);
582 std::fs::read_to_string(expanded.as_ref()).map_err(|e| {
583 format!("Failed to read words file '{}': {}", words_file, e)
584 })?
585 }
586 }
587 };
588 builder = builder.words_from_str(&content);
589 } else {
590 return Err("Word dictionary must have 'words' or 'words_file'".to_string());
591 }
592
593 if let Some(ref delimiter) = config.delimiter {
595 builder = builder.delimiter(delimiter.clone());
596 }
597
598 if let Some(case_sensitive) = config.case_sensitive {
600 builder = builder.case_sensitive(case_sensitive);
601 }
602
603 builder.build()
604 }
605
606 pub fn alternating_word_dictionary(
629 &self,
630 name: &str,
631 ) -> Result<
632 crate::AlternatingWordDictionary,
633 crate::encoders::algorithms::errors::DictionaryNotFoundError,
634 > {
635 let config = self.get_dictionary(name).ok_or_else(|| {
636 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
637 })?;
638
639 if config.dictionary_type != DictionaryType::Word {
641 return Err(
642 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
643 name,
644 format!(
645 "Dictionary '{}' is not a word dictionary (type is {:?})",
646 name, config.dictionary_type
647 ),
648 ),
649 );
650 }
651
652 let alternating_names = config.alternating.as_ref().ok_or_else(|| {
654 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
655 name,
656 format!(
657 "Dictionary '{}' is not an alternating dictionary (missing 'alternating' field)",
658 name
659 ),
660 )
661 })?;
662
663 self.build_alternating_word_dictionary(config, alternating_names)
664 .map_err(|e| {
665 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
666 })
667 }
668
669 fn build_alternating_word_dictionary(
671 &self,
672 config: &DictionaryConfig,
673 alternating_names: &[String],
674 ) -> Result<crate::AlternatingWordDictionary, String> {
675 if alternating_names.is_empty() {
676 return Err("Alternating dictionary must have at least one sub-dictionary".to_string());
677 }
678
679 let mut dictionaries = Vec::with_capacity(alternating_names.len());
681 for dict_name in alternating_names {
682 let sub_dict = self
683 .word_dictionary(dict_name)
684 .map_err(|e| format!("Failed to load sub-dictionary '{}': {}", dict_name, e))?;
685 dictionaries.push(sub_dict);
686 }
687
688 let delimiter = config.delimiter.clone().unwrap_or_else(|| " ".to_string());
690 Ok(crate::AlternatingWordDictionary::new(
693 dictionaries,
694 delimiter,
695 ))
696 }
697
698 pub fn dictionary_type(&self, name: &str) -> Option<DictionaryType> {
702 self.get_dictionary(name).map(|c| c.dictionary_type.clone())
703 }
704
705 pub fn is_word_dictionary(&self, name: &str) -> bool {
707 self.dictionary_type(name) == Some(DictionaryType::Word)
708 }
709}
710
711#[cfg(test)]
712mod tests {
713 use super::*;
714
715 #[test]
716 fn test_load_default_config() {
717 let config = DictionaryRegistry::load_default().unwrap();
718 assert!(config.dictionaries.contains_key("cards"));
719 }
720
721 #[test]
722 fn test_cards_dictionary_length() {
723 let config = DictionaryRegistry::load_default().unwrap();
724 let cards = config.get_dictionary("cards").unwrap();
725 assert_eq!(cards.chars.chars().count(), 52);
726 }
727
728 #[test]
729 fn test_base64_chunked_mode() {
730 let config = DictionaryRegistry::load_default().unwrap();
731 let base64 = config.get_dictionary("base64").unwrap();
732 assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
733 assert_eq!(base64.padding, Some("=".to_string()));
734 }
735
736 #[test]
737 fn test_base64_radix_mode() {
738 let config = DictionaryRegistry::load_default().unwrap();
739 let base64_radix = config.get_dictionary("base64_radix").unwrap();
740 assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
741 }
742
743 #[test]
744 fn test_auto_detection_power_of_two() {
745 let config = DictionaryConfig {
747 chars: "ABCD".to_string(), ..Default::default()
749 };
750 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
751
752 let config = DictionaryConfig {
754 chars: "ABC".to_string(), ..Default::default()
756 };
757 assert_eq!(config.effective_mode(), EncodingMode::Radix);
758 }
759
760 #[test]
761 fn test_explicit_mode_override() {
762 let config = DictionaryConfig {
764 chars: "ABCD".to_string(), mode: Some(EncodingMode::Radix), ..Default::default()
767 };
768 assert_eq!(config.effective_mode(), EncodingMode::Radix);
769 }
770
771 #[test]
772 fn test_merge_configs() {
773 let mut config1 = DictionaryRegistry {
774 dictionaries: HashMap::new(),
775 compression: HashMap::new(),
776 settings: Settings::default(),
777 };
778 config1.dictionaries.insert(
779 "test1".to_string(),
780 DictionaryConfig {
781 chars: "ABC".to_string(),
782 mode: Some(EncodingMode::Radix),
783 ..Default::default()
784 },
785 );
786
787 let mut config2 = DictionaryRegistry {
788 dictionaries: HashMap::new(),
789 compression: HashMap::new(),
790 settings: Settings::default(),
791 };
792 config2.dictionaries.insert(
793 "test2".to_string(),
794 DictionaryConfig {
795 chars: "XYZ".to_string(),
796 mode: Some(EncodingMode::Radix),
797 ..Default::default()
798 },
799 );
800 config2.dictionaries.insert(
801 "test1".to_string(),
802 DictionaryConfig {
803 chars: "DEF".to_string(),
804 mode: Some(EncodingMode::Radix),
805 ..Default::default()
806 },
807 );
808
809 config1.merge(config2);
810
811 assert_eq!(config1.dictionaries.len(), 2);
812 assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
813 assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
814 }
815
816 #[test]
817 fn test_load_from_toml_string() {
818 let toml_content = r#"
819[dictionaries.custom]
820chars = "0123456789"
821mode = "base_conversion"
822"#;
823 let config = DictionaryRegistry::from_toml(toml_content).unwrap();
824 assert!(config.dictionaries.contains_key("custom"));
825 assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
826 }
827
828 #[test]
829 fn test_effective_chars_from_explicit() {
830 let config = DictionaryConfig {
831 chars: "ABCD".to_string(),
832 ..Default::default()
833 };
834 assert_eq!(config.effective_chars().unwrap(), "ABCD");
835 }
836
837 #[test]
838 fn test_effective_chars_from_range() {
839 let config = DictionaryConfig {
840 start: Some("A".to_string()),
841 length: Some(4),
842 ..Default::default()
843 };
844 assert_eq!(config.effective_chars().unwrap(), "ABCD");
845 }
846
847 #[test]
848 fn test_effective_chars_explicit_takes_priority() {
849 let config = DictionaryConfig {
851 chars: "XYZ".to_string(),
852 start: Some("A".to_string()),
853 length: Some(4),
854 ..Default::default()
855 };
856 assert_eq!(config.effective_chars().unwrap(), "XYZ");
857 }
858
859 #[test]
860 fn test_effective_chars_unicode_range() {
861 let config = DictionaryConfig {
863 start: Some("가".to_string()), length: Some(4),
865 ..Default::default()
866 };
867 let result = config.effective_chars().unwrap();
868 assert_eq!(result.chars().count(), 4);
869 assert_eq!(result, "가각갂갃");
870 }
871
872 #[test]
873 fn test_effective_chars_surrogate_gap_error() {
874 let config = DictionaryConfig {
876 start: Some("\u{D700}".to_string()), length: Some(512), ..Default::default()
879 };
880 assert!(config.effective_chars().is_err());
881 }
882
883 #[test]
884 fn test_effective_chars_exceeds_unicode_max() {
885 let config = DictionaryConfig {
887 start: Some("\u{10FFFE}".to_string()), length: Some(10), ..Default::default()
890 };
891 assert!(config.effective_chars().is_err());
892 }
893
894 #[test]
895 fn test_effective_mode_with_length_field() {
896 let config = DictionaryConfig {
898 start: Some("A".to_string()),
899 length: Some(64), ..Default::default()
901 };
902 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
903
904 let config = DictionaryConfig {
905 start: Some("A".to_string()),
906 length: Some(52), ..Default::default()
908 };
909 assert_eq!(config.effective_mode(), EncodingMode::Radix);
910 }
911}