1use serde::Deserialize;
2use std::collections::HashMap;
3
4include!(concat!(env!("OUT_DIR"), "/registry.rs"));
6
7#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Default)]
9#[serde(rename_all = "snake_case")]
10pub enum DictionaryType {
11 #[default]
13 Char,
14 Word,
16}
17
18#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
23#[serde(rename_all = "snake_case")]
24#[derive(Default)]
25pub enum EncodingMode {
26 #[default]
30 #[serde(alias = "base_conversion")]
31 Radix,
32 Chunked,
35 ByteRange,
38}
39
40#[derive(Debug, Deserialize, Clone)]
42pub struct DictionaryConfig {
43 #[serde(default, rename = "type")]
46 pub dictionary_type: DictionaryType,
47
48 #[serde(default)]
51 pub chars: String,
52 #[serde(default)]
55 pub start: Option<String>,
56 #[serde(default)]
59 pub length: Option<usize>,
60 #[serde(default)]
62 pub start_codepoint: Option<u32>,
63
64 #[serde(default)]
67 pub words: Option<Vec<String>>,
68 #[serde(default)]
70 pub words_file: Option<String>,
71 #[serde(default)]
73 pub delimiter: Option<String>,
74 #[serde(default)]
76 pub case_sensitive: Option<bool>,
77 #[serde(default)]
79 pub alternating: Option<Vec<String>>,
80
81 #[serde(default)]
84 pub mode: Option<EncodingMode>,
85 #[serde(default)]
87 pub padding: Option<String>,
88 #[serde(default = "default_true")]
91 pub common: bool,
92}
93
94impl Default for DictionaryConfig {
95 fn default() -> Self {
96 Self {
97 dictionary_type: DictionaryType::default(),
98 chars: String::new(),
99 start: None,
100 length: None,
101 start_codepoint: None,
102 words: None,
103 words_file: None,
104 delimiter: None,
105 case_sensitive: None,
106 alternating: None,
107 mode: None,
108 padding: None,
109 common: true, }
111 }
112}
113
114impl DictionaryConfig {
115 pub fn effective_chars(&self) -> Result<String, String> {
122 if !self.chars.is_empty() {
124 return Ok(self.chars.clone());
125 }
126
127 if let (Some(start_str), Some(length)) = (&self.start, self.length) {
129 let start_char = start_str
130 .chars()
131 .next()
132 .ok_or("start must contain at least one character")?;
133 let start_codepoint = start_char as u32;
134
135 return Self::generate_range(start_codepoint, length);
136 }
137
138 Ok(String::new())
140 }
141
142 fn generate_range(start: u32, length: usize) -> Result<String, String> {
144 const MAX_UNICODE: u32 = 0x10FFFF;
145 const SURROGATE_START: u32 = 0xD800;
146 const SURROGATE_END: u32 = 0xDFFF;
147
148 if length == 0 {
149 return Err("length must be greater than 0".to_string());
150 }
151
152 let end = start
153 .checked_add(length as u32 - 1)
154 .ok_or("range exceeds maximum Unicode codepoint")?;
155
156 if end > MAX_UNICODE {
157 return Err(format!(
158 "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
159 end, MAX_UNICODE
160 ));
161 }
162
163 let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
165 if crosses_surrogates {
166 return Err(format!(
167 "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
168 start, end
169 ));
170 }
171
172 let mut result = String::with_capacity(length * 4); for i in 0..length {
174 let codepoint = start + i as u32;
175 match char::from_u32(codepoint) {
176 Some(c) => result.push(c),
177 None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
178 }
179 }
180
181 Ok(result)
182 }
183
184 pub fn effective_mode(&self) -> EncodingMode {
191 if let Some(mode) = &self.mode {
192 return mode.clone();
193 }
194
195 let len = if self.start_codepoint.is_some() {
197 return EncodingMode::ByteRange;
200 } else if let Some(length) = self.length {
201 length
203 } else {
204 self.chars.chars().count()
205 };
206
207 if len > 0 && len.is_power_of_two() {
208 EncodingMode::Chunked
209 } else {
210 EncodingMode::Radix
211 }
212 }
213}
214
215fn default_true() -> bool {
216 true
217}
218
219#[derive(Debug, Deserialize)]
221pub struct DictionaryRegistry {
222 pub dictionaries: HashMap<String, DictionaryConfig>,
224 #[serde(default)]
226 pub compression: HashMap<String, CompressionConfig>,
227 #[serde(default)]
229 pub settings: Settings,
230}
231
232#[derive(Debug, Deserialize, Clone)]
234pub struct CompressionConfig {
235 pub default_level: u32,
237}
238
239#[derive(Debug, Deserialize, Clone, Default)]
241pub struct XxHashSettings {
242 #[serde(default)]
244 pub default_seed: u64,
245 #[serde(default)]
247 pub default_secret_file: Option<String>,
248}
249
250#[derive(Debug, Deserialize, Clone, Default)]
252pub struct Settings {
253 #[serde(default)]
255 pub default_dictionary: Option<String>,
256 #[serde(default)]
258 pub xxhash: XxHashSettings,
259}
260
261impl DictionaryRegistry {
262 pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
264 toml::from_str(content)
265 }
266
267 pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
271 Ok(Self {
272 dictionaries: build_registry(),
273 compression: HashMap::new(),
274 settings: Settings::default(),
275 })
276 }
277
278 pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
280 let content = std::fs::read_to_string(path)?;
281 Ok(Self::from_toml(&content)?)
282 }
283
284 pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
293 let mut config = Self::load_default()?;
294
295 if let Some(config_dir) = dirs::config_dir() {
297 let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
298 if user_config_path.exists() {
299 match Self::load_from_file(&user_config_path) {
300 Ok(user_config) => {
301 config.merge(user_config);
302 }
303 Err(e) => {
304 eprintln!(
305 "Warning: Failed to load user config from {:?}: {}",
306 user_config_path, e
307 );
308 }
309 }
310 }
311 }
312
313 let local_config_path = std::path::Path::new("dictionaries.toml");
315 if local_config_path.exists() {
316 match Self::load_from_file(local_config_path) {
317 Ok(local_config) => {
318 config.merge(local_config);
319 }
320 Err(e) => {
321 eprintln!(
322 "Warning: Failed to load local config from {:?}: {}",
323 local_config_path, e
324 );
325 }
326 }
327 }
328
329 Ok(config)
330 }
331
332 pub fn merge(&mut self, other: DictionaryRegistry) {
336 for (name, dictionary) in other.dictionaries {
337 self.dictionaries.insert(name, dictionary);
338 }
339 }
340
341 pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
343 self.dictionaries.get(name)
344 }
345
346 pub fn dictionary(
364 &self,
365 name: &str,
366 ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
367 {
368 let config = self.get_dictionary(name).ok_or_else(|| {
369 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
370 })?;
371
372 self.build_dictionary(config).map_err(|e| {
373 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
374 })
375 }
376
377 pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
393 use rand::seq::IteratorRandom;
394
395 let common_names: Vec<&String> = self
396 .dictionaries
397 .iter()
398 .filter(|(_, config)| {
399 config.common && config.dictionary_type == DictionaryType::Char
401 })
402 .map(|(name, _)| name)
403 .collect();
404
405 let name = common_names
406 .into_iter()
407 .choose(&mut rand::rng())
408 .ok_or("No common dictionaries available")?;
409
410 let dict = self.dictionary(name)?;
411 Ok((name.clone(), dict))
412 }
413
414 pub fn names(&self) -> Vec<&str> {
416 self.dictionaries.keys().map(|s| s.as_str()).collect()
417 }
418
419 pub fn common_names(&self) -> Vec<&str> {
421 self.dictionaries
422 .iter()
423 .filter(|(_, config)| config.common)
424 .map(|(name, _)| name.as_str())
425 .collect()
426 }
427
428 fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
430 use crate::core::config::EncodingMode;
431
432 let mode = config.effective_mode();
433
434 if mode == EncodingMode::ByteRange {
436 let start = config
437 .start_codepoint
438 .ok_or("ByteRange mode requires start_codepoint")?;
439 return crate::Dictionary::builder()
440 .mode(mode)
441 .start_codepoint(start)
442 .build();
443 }
444
445 let chars_str = config.effective_chars()?;
447 let chars: Vec<char> = chars_str.chars().collect();
448
449 let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
451
452 if let Some(pad_str) = &config.padding
453 && let Some(pad_char) = pad_str.chars().next()
454 {
455 builder = builder.padding(pad_char);
456 }
457
458 builder.build()
459 }
460
461 pub fn word_dictionary(
482 &self,
483 name: &str,
484 ) -> Result<crate::WordDictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
485 {
486 let config = self.get_dictionary(name).ok_or_else(|| {
487 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
488 })?;
489
490 if config.dictionary_type != DictionaryType::Word {
492 return Err(
493 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
494 name,
495 format!(
496 "Dictionary '{}' is not a word dictionary (type is {:?})",
497 name, config.dictionary_type
498 ),
499 ),
500 );
501 }
502
503 self.build_word_dictionary(config).map_err(|e| {
504 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
505 })
506 }
507
508 fn build_word_dictionary(
510 &self,
511 config: &DictionaryConfig,
512 ) -> Result<crate::WordDictionary, String> {
513 let mut builder = crate::WordDictionary::builder();
514
515 if let Some(ref words) = config.words {
517 builder = builder.words(words.clone());
518 } else if let Some(ref words_file) = config.words_file {
519 let content = if let Some(embedded) = get_embedded_wordlist(words_file) {
521 embedded.to_string()
522 } else {
523 match words_file.as_str() {
525 "builtin:bip39" | "builtin:bip39-english" => {
526 crate::wordlists::BIP39_ENGLISH.to_string()
527 }
528 "builtin:eff_long" | "builtin:eff-long" => {
529 crate::wordlists::EFF_LONG.to_string()
530 }
531 "builtin:eff_short1" | "builtin:eff-short1" => {
532 crate::wordlists::EFF_SHORT1.to_string()
533 }
534 "builtin:eff_short2" | "builtin:eff-short2" => {
535 crate::wordlists::EFF_SHORT2.to_string()
536 }
537 "builtin:diceware" => crate::wordlists::DICEWARE.to_string(),
538 "builtin:pgp_even" | "builtin:pgp-even" => {
539 crate::wordlists::PGP_EVEN.to_string()
540 }
541 "builtin:pgp_odd" | "builtin:pgp-odd" => crate::wordlists::PGP_ODD.to_string(),
542 "builtin:nato" => crate::wordlists::NATO.to_string(),
543 "builtin:buzzwords" => crate::wordlists::BUZZWORDS.to_string(),
544 "builtin:klingon" => crate::wordlists::KLINGON.to_string(),
545 "builtin:pokemon" => crate::wordlists::POKEMON.to_string(),
546 _ => {
547 let expanded = shellexpand::tilde(words_file);
549 std::fs::read_to_string(expanded.as_ref()).map_err(|e| {
550 format!("Failed to read words file '{}': {}", words_file, e)
551 })?
552 }
553 }
554 };
555 builder = builder.words_from_str(&content);
556 } else {
557 return Err("Word dictionary must have 'words' or 'words_file'".to_string());
558 }
559
560 if let Some(ref delimiter) = config.delimiter {
562 builder = builder.delimiter(delimiter.clone());
563 }
564
565 if let Some(case_sensitive) = config.case_sensitive {
567 builder = builder.case_sensitive(case_sensitive);
568 }
569
570 builder.build()
571 }
572
573 pub fn alternating_word_dictionary(
596 &self,
597 name: &str,
598 ) -> Result<
599 crate::AlternatingWordDictionary,
600 crate::encoders::algorithms::errors::DictionaryNotFoundError,
601 > {
602 let config = self.get_dictionary(name).ok_or_else(|| {
603 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
604 })?;
605
606 if config.dictionary_type != DictionaryType::Word {
608 return Err(
609 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
610 name,
611 format!(
612 "Dictionary '{}' is not a word dictionary (type is {:?})",
613 name, config.dictionary_type
614 ),
615 ),
616 );
617 }
618
619 let alternating_names = config.alternating.as_ref().ok_or_else(|| {
621 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(
622 name,
623 format!(
624 "Dictionary '{}' is not an alternating dictionary (missing 'alternating' field)",
625 name
626 ),
627 )
628 })?;
629
630 self.build_alternating_word_dictionary(config, alternating_names)
631 .map_err(|e| {
632 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
633 })
634 }
635
636 fn build_alternating_word_dictionary(
638 &self,
639 config: &DictionaryConfig,
640 alternating_names: &[String],
641 ) -> Result<crate::AlternatingWordDictionary, String> {
642 if alternating_names.is_empty() {
643 return Err("Alternating dictionary must have at least one sub-dictionary".to_string());
644 }
645
646 let mut dictionaries = Vec::with_capacity(alternating_names.len());
648 for dict_name in alternating_names {
649 let sub_dict = self
650 .word_dictionary(dict_name)
651 .map_err(|e| format!("Failed to load sub-dictionary '{}': {}", dict_name, e))?;
652 dictionaries.push(sub_dict);
653 }
654
655 let delimiter = config.delimiter.clone().unwrap_or_else(|| " ".to_string());
657 Ok(crate::AlternatingWordDictionary::new(
660 dictionaries,
661 delimiter,
662 ))
663 }
664
665 pub fn dictionary_type(&self, name: &str) -> Option<DictionaryType> {
669 self.get_dictionary(name).map(|c| c.dictionary_type.clone())
670 }
671
672 pub fn is_word_dictionary(&self, name: &str) -> bool {
674 self.dictionary_type(name) == Some(DictionaryType::Word)
675 }
676}
677
678#[cfg(test)]
679mod tests {
680 use super::*;
681
682 #[test]
683 fn test_load_default_config() {
684 let config = DictionaryRegistry::load_default().unwrap();
685 assert!(config.dictionaries.contains_key("cards"));
686 }
687
688 #[test]
689 fn test_cards_dictionary_length() {
690 let config = DictionaryRegistry::load_default().unwrap();
691 let cards = config.get_dictionary("cards").unwrap();
692 assert_eq!(cards.chars.chars().count(), 52);
693 }
694
695 #[test]
696 fn test_base64_chunked_mode() {
697 let config = DictionaryRegistry::load_default().unwrap();
698 let base64 = config.get_dictionary("base64").unwrap();
699 assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
700 assert_eq!(base64.padding, Some("=".to_string()));
701 }
702
703 #[test]
704 fn test_base64_radix_mode() {
705 let config = DictionaryRegistry::load_default().unwrap();
706 let base64_radix = config.get_dictionary("base64_radix").unwrap();
707 assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
708 }
709
710 #[test]
711 fn test_auto_detection_power_of_two() {
712 let config = DictionaryConfig {
714 chars: "ABCD".to_string(), ..Default::default()
716 };
717 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
718
719 let config = DictionaryConfig {
721 chars: "ABC".to_string(), ..Default::default()
723 };
724 assert_eq!(config.effective_mode(), EncodingMode::Radix);
725 }
726
727 #[test]
728 fn test_explicit_mode_override() {
729 let config = DictionaryConfig {
731 chars: "ABCD".to_string(), mode: Some(EncodingMode::Radix), ..Default::default()
734 };
735 assert_eq!(config.effective_mode(), EncodingMode::Radix);
736 }
737
738 #[test]
739 fn test_merge_configs() {
740 let mut config1 = DictionaryRegistry {
741 dictionaries: HashMap::new(),
742 compression: HashMap::new(),
743 settings: Settings::default(),
744 };
745 config1.dictionaries.insert(
746 "test1".to_string(),
747 DictionaryConfig {
748 chars: "ABC".to_string(),
749 mode: Some(EncodingMode::Radix),
750 ..Default::default()
751 },
752 );
753
754 let mut config2 = DictionaryRegistry {
755 dictionaries: HashMap::new(),
756 compression: HashMap::new(),
757 settings: Settings::default(),
758 };
759 config2.dictionaries.insert(
760 "test2".to_string(),
761 DictionaryConfig {
762 chars: "XYZ".to_string(),
763 mode: Some(EncodingMode::Radix),
764 ..Default::default()
765 },
766 );
767 config2.dictionaries.insert(
768 "test1".to_string(),
769 DictionaryConfig {
770 chars: "DEF".to_string(),
771 mode: Some(EncodingMode::Radix),
772 ..Default::default()
773 },
774 );
775
776 config1.merge(config2);
777
778 assert_eq!(config1.dictionaries.len(), 2);
779 assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
780 assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
781 }
782
783 #[test]
784 fn test_load_from_toml_string() {
785 let toml_content = r#"
786[dictionaries.custom]
787chars = "0123456789"
788mode = "base_conversion"
789"#;
790 let config = DictionaryRegistry::from_toml(toml_content).unwrap();
791 assert!(config.dictionaries.contains_key("custom"));
792 assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
793 }
794
795 #[test]
796 fn test_effective_chars_from_explicit() {
797 let config = DictionaryConfig {
798 chars: "ABCD".to_string(),
799 ..Default::default()
800 };
801 assert_eq!(config.effective_chars().unwrap(), "ABCD");
802 }
803
804 #[test]
805 fn test_effective_chars_from_range() {
806 let config = DictionaryConfig {
807 start: Some("A".to_string()),
808 length: Some(4),
809 ..Default::default()
810 };
811 assert_eq!(config.effective_chars().unwrap(), "ABCD");
812 }
813
814 #[test]
815 fn test_effective_chars_explicit_takes_priority() {
816 let config = DictionaryConfig {
818 chars: "XYZ".to_string(),
819 start: Some("A".to_string()),
820 length: Some(4),
821 ..Default::default()
822 };
823 assert_eq!(config.effective_chars().unwrap(), "XYZ");
824 }
825
826 #[test]
827 fn test_effective_chars_unicode_range() {
828 let config = DictionaryConfig {
830 start: Some("가".to_string()), length: Some(4),
832 ..Default::default()
833 };
834 let result = config.effective_chars().unwrap();
835 assert_eq!(result.chars().count(), 4);
836 assert_eq!(result, "가각갂갃");
837 }
838
839 #[test]
840 fn test_effective_chars_surrogate_gap_error() {
841 let config = DictionaryConfig {
843 start: Some("\u{D700}".to_string()), length: Some(512), ..Default::default()
846 };
847 assert!(config.effective_chars().is_err());
848 }
849
850 #[test]
851 fn test_effective_chars_exceeds_unicode_max() {
852 let config = DictionaryConfig {
854 start: Some("\u{10FFFE}".to_string()), length: Some(10), ..Default::default()
857 };
858 assert!(config.effective_chars().is_err());
859 }
860
861 #[test]
862 fn test_effective_mode_with_length_field() {
863 let config = DictionaryConfig {
865 start: Some("A".to_string()),
866 length: Some(64), ..Default::default()
868 };
869 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
870
871 let config = DictionaryConfig {
872 start: Some("A".to_string()),
873 length: Some(52), ..Default::default()
875 };
876 assert_eq!(config.effective_mode(), EncodingMode::Radix);
877 }
878}