1use serde::Deserialize;
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
9#[serde(rename_all = "snake_case")]
10#[derive(Default)]
11pub enum EncodingMode {
12 #[default]
16 #[serde(alias = "base_conversion")]
17 Radix,
18 Chunked,
21 ByteRange,
24}
25
26#[derive(Debug, Deserialize, Clone)]
28pub struct DictionaryConfig {
29 #[serde(default)]
31 pub chars: String,
32 #[serde(default)]
35 pub start: Option<String>,
36 #[serde(default)]
39 pub length: Option<usize>,
40 #[serde(default)]
42 pub mode: Option<EncodingMode>,
43 #[serde(default)]
45 pub padding: Option<String>,
46 #[serde(default)]
48 pub start_codepoint: Option<u32>,
49 #[serde(default = "default_true")]
52 pub common: bool,
53}
54
55impl DictionaryConfig {
56 pub fn effective_chars(&self) -> Result<String, String> {
63 if !self.chars.is_empty() {
65 return Ok(self.chars.clone());
66 }
67
68 if let (Some(start_str), Some(length)) = (&self.start, self.length) {
70 let start_char = start_str
71 .chars()
72 .next()
73 .ok_or("start must contain at least one character")?;
74 let start_codepoint = start_char as u32;
75
76 return Self::generate_range(start_codepoint, length);
77 }
78
79 Ok(String::new())
81 }
82
83 fn generate_range(start: u32, length: usize) -> Result<String, String> {
85 const MAX_UNICODE: u32 = 0x10FFFF;
86 const SURROGATE_START: u32 = 0xD800;
87 const SURROGATE_END: u32 = 0xDFFF;
88
89 if length == 0 {
90 return Err("length must be greater than 0".to_string());
91 }
92
93 let end = start
94 .checked_add(length as u32 - 1)
95 .ok_or("range exceeds maximum Unicode codepoint")?;
96
97 if end > MAX_UNICODE {
98 return Err(format!(
99 "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
100 end, MAX_UNICODE
101 ));
102 }
103
104 let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
106 if crosses_surrogates {
107 return Err(format!(
108 "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
109 start, end
110 ));
111 }
112
113 let mut result = String::with_capacity(length * 4); for i in 0..length {
115 let codepoint = start + i as u32;
116 match char::from_u32(codepoint) {
117 Some(c) => result.push(c),
118 None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
119 }
120 }
121
122 Ok(result)
123 }
124
125 pub fn effective_mode(&self) -> EncodingMode {
132 if let Some(mode) = &self.mode {
133 return mode.clone();
134 }
135
136 let len = if self.start_codepoint.is_some() {
138 return EncodingMode::ByteRange;
141 } else if let Some(length) = self.length {
142 length
144 } else {
145 self.chars.chars().count()
146 };
147
148 if len > 0 && len.is_power_of_two() {
149 EncodingMode::Chunked
150 } else {
151 EncodingMode::Radix
152 }
153 }
154}
155
156fn default_true() -> bool {
157 true
158}
159
160#[derive(Debug, Deserialize)]
162pub struct DictionaryRegistry {
163 pub dictionaries: HashMap<String, DictionaryConfig>,
165 #[serde(default)]
167 pub compression: HashMap<String, CompressionConfig>,
168 #[serde(default)]
170 pub settings: Settings,
171}
172
173#[derive(Debug, Deserialize, Clone)]
175pub struct CompressionConfig {
176 pub default_level: u32,
178}
179
180#[derive(Debug, Deserialize, Clone, Default)]
182pub struct XxHashSettings {
183 #[serde(default)]
185 pub default_seed: u64,
186 #[serde(default)]
188 pub default_secret_file: Option<String>,
189}
190
191#[derive(Debug, Deserialize, Clone, Default)]
193pub struct Settings {
194 #[serde(default)]
196 pub default_dictionary: Option<String>,
197 #[serde(default)]
199 pub xxhash: XxHashSettings,
200}
201
202impl DictionaryRegistry {
203 pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
205 toml::from_str(content)
206 }
207
208 pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
212 let content = include_str!("../../dictionaries.toml");
213 Ok(Self::from_toml(content)?)
214 }
215
216 pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
218 let content = std::fs::read_to_string(path)?;
219 Ok(Self::from_toml(&content)?)
220 }
221
222 pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
231 let mut config = Self::load_default()?;
232
233 if let Some(config_dir) = dirs::config_dir() {
235 let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
236 if user_config_path.exists() {
237 match Self::load_from_file(&user_config_path) {
238 Ok(user_config) => {
239 config.merge(user_config);
240 }
241 Err(e) => {
242 eprintln!(
243 "Warning: Failed to load user config from {:?}: {}",
244 user_config_path, e
245 );
246 }
247 }
248 }
249 }
250
251 let local_config_path = std::path::Path::new("dictionaries.toml");
253 if local_config_path.exists() {
254 match Self::load_from_file(local_config_path) {
255 Ok(local_config) => {
256 config.merge(local_config);
257 }
258 Err(e) => {
259 eprintln!(
260 "Warning: Failed to load local config from {:?}: {}",
261 local_config_path, e
262 );
263 }
264 }
265 }
266
267 Ok(config)
268 }
269
270 pub fn merge(&mut self, other: DictionaryRegistry) {
274 for (name, dictionary) in other.dictionaries {
275 self.dictionaries.insert(name, dictionary);
276 }
277 }
278
279 pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
281 self.dictionaries.get(name)
282 }
283
284 pub fn dictionary(
302 &self,
303 name: &str,
304 ) -> Result<crate::Dictionary, crate::encoders::algorithms::errors::DictionaryNotFoundError>
305 {
306 let config = self.get_dictionary(name).ok_or_else(|| {
307 crate::encoders::algorithms::errors::DictionaryNotFoundError::new(name)
308 })?;
309
310 self.build_dictionary(config).map_err(|e| {
311 crate::encoders::algorithms::errors::DictionaryNotFoundError::with_cause(name, e)
312 })
313 }
314
315 pub fn random(&self) -> Result<(String, crate::Dictionary), Box<dyn std::error::Error>> {
331 use rand::seq::IteratorRandom;
332
333 let common_names: Vec<&String> = self
334 .dictionaries
335 .iter()
336 .filter(|(_, config)| config.common)
337 .map(|(name, _)| name)
338 .collect();
339
340 let name = common_names
341 .into_iter()
342 .choose(&mut rand::rng())
343 .ok_or("No common dictionaries available")?;
344
345 let dict = self.dictionary(name)?;
346 Ok((name.clone(), dict))
347 }
348
349 pub fn names(&self) -> Vec<&str> {
351 self.dictionaries.keys().map(|s| s.as_str()).collect()
352 }
353
354 pub fn common_names(&self) -> Vec<&str> {
356 self.dictionaries
357 .iter()
358 .filter(|(_, config)| config.common)
359 .map(|(name, _)| name.as_str())
360 .collect()
361 }
362
363 fn build_dictionary(&self, config: &DictionaryConfig) -> Result<crate::Dictionary, String> {
365 use crate::core::config::EncodingMode;
366
367 let mode = config.effective_mode();
368
369 if mode == EncodingMode::ByteRange {
371 let start = config
372 .start_codepoint
373 .ok_or("ByteRange mode requires start_codepoint")?;
374 return crate::Dictionary::builder()
375 .mode(mode)
376 .start_codepoint(start)
377 .build();
378 }
379
380 let chars_str = config.effective_chars()?;
382 let chars: Vec<char> = chars_str.chars().collect();
383
384 let mut builder = crate::Dictionary::builder().chars(chars).mode(mode);
386
387 if let Some(pad_str) = &config.padding
388 && let Some(pad_char) = pad_str.chars().next()
389 {
390 builder = builder.padding(pad_char);
391 }
392
393 builder.build()
394 }
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400
401 #[test]
402 fn test_load_default_config() {
403 let config = DictionaryRegistry::load_default().unwrap();
404 assert!(config.dictionaries.contains_key("cards"));
405 }
406
407 #[test]
408 fn test_cards_dictionary_length() {
409 let config = DictionaryRegistry::load_default().unwrap();
410 let cards = config.get_dictionary("cards").unwrap();
411 assert_eq!(cards.chars.chars().count(), 52);
412 }
413
414 #[test]
415 fn test_base64_chunked_mode() {
416 let config = DictionaryRegistry::load_default().unwrap();
417 let base64 = config.get_dictionary("base64").unwrap();
418 assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
419 assert_eq!(base64.padding, Some("=".to_string()));
420 }
421
422 #[test]
423 fn test_base64_radix_mode() {
424 let config = DictionaryRegistry::load_default().unwrap();
425 let base64_radix = config.get_dictionary("base64_radix").unwrap();
426 assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
427 }
428
429 #[test]
430 fn test_auto_detection_power_of_two() {
431 let config = DictionaryConfig {
433 chars: "ABCD".to_string(), mode: None,
435 padding: None,
436 start_codepoint: None,
437 start: None,
438 length: None,
439 common: true,
440 };
441 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
442
443 let config = DictionaryConfig {
445 chars: "ABC".to_string(), mode: None,
447 padding: None,
448 start_codepoint: None,
449 start: None,
450 length: None,
451 common: true,
452 };
453 assert_eq!(config.effective_mode(), EncodingMode::Radix);
454 }
455
456 #[test]
457 fn test_explicit_mode_override() {
458 let config = DictionaryConfig {
460 chars: "ABCD".to_string(), mode: Some(EncodingMode::Radix), padding: None,
463 start_codepoint: None,
464 start: None,
465 length: None,
466 common: true,
467 };
468 assert_eq!(config.effective_mode(), EncodingMode::Radix);
469 }
470
471 #[test]
472 fn test_merge_configs() {
473 let mut config1 = DictionaryRegistry {
474 dictionaries: HashMap::new(),
475 compression: HashMap::new(),
476 settings: Settings::default(),
477 };
478 config1.dictionaries.insert(
479 "test1".to_string(),
480 DictionaryConfig {
481 chars: "ABC".to_string(),
482 mode: Some(EncodingMode::Radix),
483 padding: None,
484 start_codepoint: None,
485 start: None,
486 length: None,
487 common: true,
488 },
489 );
490
491 let mut config2 = DictionaryRegistry {
492 dictionaries: HashMap::new(),
493 compression: HashMap::new(),
494 settings: Settings::default(),
495 };
496 config2.dictionaries.insert(
497 "test2".to_string(),
498 DictionaryConfig {
499 chars: "XYZ".to_string(),
500 mode: Some(EncodingMode::Radix),
501 padding: None,
502 start_codepoint: None,
503 start: None,
504 length: None,
505 common: true,
506 },
507 );
508 config2.dictionaries.insert(
509 "test1".to_string(),
510 DictionaryConfig {
511 chars: "DEF".to_string(),
512 mode: Some(EncodingMode::Radix),
513 padding: None,
514 start_codepoint: None,
515 start: None,
516 length: None,
517 common: true,
518 },
519 );
520
521 config1.merge(config2);
522
523 assert_eq!(config1.dictionaries.len(), 2);
524 assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
525 assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
526 }
527
528 #[test]
529 fn test_load_from_toml_string() {
530 let toml_content = r#"
531[dictionaries.custom]
532chars = "0123456789"
533mode = "base_conversion"
534"#;
535 let config = DictionaryRegistry::from_toml(toml_content).unwrap();
536 assert!(config.dictionaries.contains_key("custom"));
537 assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
538 }
539
540 #[test]
541 fn test_effective_chars_from_explicit() {
542 let config = DictionaryConfig {
543 chars: "ABCD".to_string(),
544 mode: None,
545 padding: None,
546 start_codepoint: None,
547 start: None,
548 length: None,
549 common: true,
550 };
551 assert_eq!(config.effective_chars().unwrap(), "ABCD");
552 }
553
554 #[test]
555 fn test_effective_chars_from_range() {
556 let config = DictionaryConfig {
557 chars: String::new(),
558 mode: None,
559 padding: None,
560 start_codepoint: None,
561 start: Some("A".to_string()),
562 length: Some(4),
563 common: true,
564 };
565 assert_eq!(config.effective_chars().unwrap(), "ABCD");
566 }
567
568 #[test]
569 fn test_effective_chars_explicit_takes_priority() {
570 let config = DictionaryConfig {
572 chars: "XYZ".to_string(),
573 mode: None,
574 padding: None,
575 start_codepoint: None,
576 start: Some("A".to_string()),
577 length: Some(4),
578 common: true,
579 };
580 assert_eq!(config.effective_chars().unwrap(), "XYZ");
581 }
582
583 #[test]
584 fn test_effective_chars_unicode_range() {
585 let config = DictionaryConfig {
587 chars: String::new(),
588 mode: None,
589 padding: None,
590 start_codepoint: None,
591 start: Some("가".to_string()), length: Some(4),
593 common: true,
594 };
595 let result = config.effective_chars().unwrap();
596 assert_eq!(result.chars().count(), 4);
597 assert_eq!(result, "가각갂갃");
598 }
599
600 #[test]
601 fn test_effective_chars_surrogate_gap_error() {
602 let config = DictionaryConfig {
604 chars: String::new(),
605 mode: None,
606 padding: None,
607 start_codepoint: None,
608 start: Some("\u{D700}".to_string()), length: Some(512), common: true,
611 };
612 assert!(config.effective_chars().is_err());
613 }
614
615 #[test]
616 fn test_effective_chars_exceeds_unicode_max() {
617 let config = DictionaryConfig {
619 chars: String::new(),
620 mode: None,
621 padding: None,
622 start_codepoint: None,
623 start: Some("\u{10FFFE}".to_string()), length: Some(10), common: true,
626 };
627 assert!(config.effective_chars().is_err());
628 }
629
630 #[test]
631 fn test_effective_mode_with_length_field() {
632 let config = DictionaryConfig {
634 chars: String::new(),
635 mode: None,
636 padding: None,
637 start_codepoint: None,
638 start: Some("A".to_string()),
639 length: Some(64), common: true,
641 };
642 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
643
644 let config = DictionaryConfig {
645 chars: String::new(),
646 mode: None,
647 padding: None,
648 start_codepoint: None,
649 start: Some("A".to_string()),
650 length: Some(52), common: true,
652 };
653 assert_eq!(config.effective_mode(), EncodingMode::Radix);
654 }
655}