1use serde::Deserialize;
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
9#[serde(rename_all = "snake_case")]
10#[derive(Default)]
11pub enum EncodingMode {
12 #[default]
16 #[serde(alias = "base_conversion")]
17 Radix,
18 Chunked,
21 ByteRange,
24}
25
26#[derive(Debug, Deserialize, Clone)]
28pub struct DictionaryConfig {
29 #[serde(default)]
31 pub chars: String,
32 #[serde(default)]
35 pub start: Option<String>,
36 #[serde(default)]
39 pub length: Option<usize>,
40 #[serde(default)]
42 pub mode: Option<EncodingMode>,
43 #[serde(default)]
45 pub padding: Option<String>,
46 #[serde(default)]
48 pub start_codepoint: Option<u32>,
49 #[serde(default = "default_true")]
52 pub common: bool,
53}
54
55impl DictionaryConfig {
56 pub fn effective_chars(&self) -> Result<String, String> {
63 if !self.chars.is_empty() {
65 return Ok(self.chars.clone());
66 }
67
68 if let (Some(start_str), Some(length)) = (&self.start, self.length) {
70 let start_char = start_str
71 .chars()
72 .next()
73 .ok_or("start must contain at least one character")?;
74 let start_codepoint = start_char as u32;
75
76 return Self::generate_range(start_codepoint, length);
77 }
78
79 Ok(String::new())
81 }
82
83 fn generate_range(start: u32, length: usize) -> Result<String, String> {
85 const MAX_UNICODE: u32 = 0x10FFFF;
86 const SURROGATE_START: u32 = 0xD800;
87 const SURROGATE_END: u32 = 0xDFFF;
88
89 if length == 0 {
90 return Err("length must be greater than 0".to_string());
91 }
92
93 let end = start
94 .checked_add(length as u32 - 1)
95 .ok_or("range exceeds maximum Unicode codepoint")?;
96
97 if end > MAX_UNICODE {
98 return Err(format!(
99 "range end U+{:X} exceeds maximum Unicode codepoint U+{:X}",
100 end, MAX_UNICODE
101 ));
102 }
103
104 let crosses_surrogates = start <= SURROGATE_END && end >= SURROGATE_START;
106 if crosses_surrogates {
107 return Err(format!(
108 "range U+{:X}..U+{:X} crosses surrogate gap (U+D800..U+DFFF)",
109 start, end
110 ));
111 }
112
113 let mut result = String::with_capacity(length * 4); for i in 0..length {
115 let codepoint = start + i as u32;
116 match char::from_u32(codepoint) {
117 Some(c) => result.push(c),
118 None => return Err(format!("invalid codepoint U+{:X}", codepoint)),
119 }
120 }
121
122 Ok(result)
123 }
124
125 pub fn effective_mode(&self) -> EncodingMode {
132 if let Some(mode) = &self.mode {
133 return mode.clone();
134 }
135
136 let len = if self.start_codepoint.is_some() {
138 return EncodingMode::ByteRange;
141 } else if let Some(length) = self.length {
142 length
144 } else {
145 self.chars.chars().count()
146 };
147
148 if len > 0 && len.is_power_of_two() {
149 EncodingMode::Chunked
150 } else {
151 EncodingMode::Radix
152 }
153 }
154}
155
156fn default_true() -> bool {
157 true
158}
159
160#[derive(Debug, Deserialize)]
162pub struct DictionaryRegistry {
163 pub dictionaries: HashMap<String, DictionaryConfig>,
165 #[serde(default)]
167 pub compression: HashMap<String, CompressionConfig>,
168 #[serde(default)]
170 pub settings: Settings,
171}
172
173#[derive(Debug, Deserialize, Clone)]
175pub struct CompressionConfig {
176 pub default_level: u32,
178}
179
180#[derive(Debug, Deserialize, Clone, Default)]
182pub struct XxHashSettings {
183 #[serde(default)]
185 pub default_seed: u64,
186 #[serde(default)]
188 pub default_secret_file: Option<String>,
189}
190
191#[derive(Debug, Deserialize, Clone, Default)]
193pub struct Settings {
194 #[serde(default)]
196 pub default_dictionary: Option<String>,
197 #[serde(default)]
199 pub xxhash: XxHashSettings,
200}
201
202impl DictionaryRegistry {
203 pub fn from_toml(content: &str) -> Result<Self, toml::de::Error> {
205 toml::from_str(content)
206 }
207
208 pub fn load_default() -> Result<Self, Box<dyn std::error::Error>> {
212 let content = include_str!("../../dictionaries.toml");
213 Ok(Self::from_toml(content)?)
214 }
215
216 pub fn load_from_file(path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
218 let content = std::fs::read_to_string(path)?;
219 Ok(Self::from_toml(&content)?)
220 }
221
222 pub fn load_with_overrides() -> Result<Self, Box<dyn std::error::Error>> {
231 let mut config = Self::load_default()?;
232
233 if let Some(config_dir) = dirs::config_dir() {
235 let user_config_path = config_dir.join("base-d").join("dictionaries.toml");
236 if user_config_path.exists() {
237 match Self::load_from_file(&user_config_path) {
238 Ok(user_config) => {
239 config.merge(user_config);
240 }
241 Err(e) => {
242 eprintln!(
243 "Warning: Failed to load user config from {:?}: {}",
244 user_config_path, e
245 );
246 }
247 }
248 }
249 }
250
251 let local_config_path = std::path::Path::new("dictionaries.toml");
253 if local_config_path.exists() {
254 match Self::load_from_file(local_config_path) {
255 Ok(local_config) => {
256 config.merge(local_config);
257 }
258 Err(e) => {
259 eprintln!(
260 "Warning: Failed to load local config from {:?}: {}",
261 local_config_path, e
262 );
263 }
264 }
265 }
266
267 Ok(config)
268 }
269
270 pub fn merge(&mut self, other: DictionaryRegistry) {
274 for (name, dictionary) in other.dictionaries {
275 self.dictionaries.insert(name, dictionary);
276 }
277 }
278
279 pub fn get_dictionary(&self, name: &str) -> Option<&DictionaryConfig> {
281 self.dictionaries.get(name)
282 }
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn test_load_default_config() {
291 let config = DictionaryRegistry::load_default().unwrap();
292 assert!(config.dictionaries.contains_key("cards"));
293 }
294
295 #[test]
296 fn test_cards_dictionary_length() {
297 let config = DictionaryRegistry::load_default().unwrap();
298 let cards = config.get_dictionary("cards").unwrap();
299 assert_eq!(cards.chars.chars().count(), 52);
300 }
301
302 #[test]
303 fn test_base64_chunked_mode() {
304 let config = DictionaryRegistry::load_default().unwrap();
305 let base64 = config.get_dictionary("base64").unwrap();
306 assert_eq!(base64.effective_mode(), EncodingMode::Chunked);
307 assert_eq!(base64.padding, Some("=".to_string()));
308 }
309
310 #[test]
311 fn test_base64_radix_mode() {
312 let config = DictionaryRegistry::load_default().unwrap();
313 let base64_radix = config.get_dictionary("base64_radix").unwrap();
314 assert_eq!(base64_radix.effective_mode(), EncodingMode::Radix);
315 }
316
317 #[test]
318 fn test_auto_detection_power_of_two() {
319 let config = DictionaryConfig {
321 chars: "ABCD".to_string(), mode: None,
323 padding: None,
324 start_codepoint: None,
325 start: None,
326 length: None,
327 common: true,
328 };
329 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
330
331 let config = DictionaryConfig {
333 chars: "ABC".to_string(), mode: None,
335 padding: None,
336 start_codepoint: None,
337 start: None,
338 length: None,
339 common: true,
340 };
341 assert_eq!(config.effective_mode(), EncodingMode::Radix);
342 }
343
344 #[test]
345 fn test_explicit_mode_override() {
346 let config = DictionaryConfig {
348 chars: "ABCD".to_string(), mode: Some(EncodingMode::Radix), padding: None,
351 start_codepoint: None,
352 start: None,
353 length: None,
354 common: true,
355 };
356 assert_eq!(config.effective_mode(), EncodingMode::Radix);
357 }
358
359 #[test]
360 fn test_merge_configs() {
361 let mut config1 = DictionaryRegistry {
362 dictionaries: HashMap::new(),
363 compression: HashMap::new(),
364 settings: Settings::default(),
365 };
366 config1.dictionaries.insert(
367 "test1".to_string(),
368 DictionaryConfig {
369 chars: "ABC".to_string(),
370 mode: Some(EncodingMode::Radix),
371 padding: None,
372 start_codepoint: None,
373 start: None,
374 length: None,
375 common: true,
376 },
377 );
378
379 let mut config2 = DictionaryRegistry {
380 dictionaries: HashMap::new(),
381 compression: HashMap::new(),
382 settings: Settings::default(),
383 };
384 config2.dictionaries.insert(
385 "test2".to_string(),
386 DictionaryConfig {
387 chars: "XYZ".to_string(),
388 mode: Some(EncodingMode::Radix),
389 padding: None,
390 start_codepoint: None,
391 start: None,
392 length: None,
393 common: true,
394 },
395 );
396 config2.dictionaries.insert(
397 "test1".to_string(),
398 DictionaryConfig {
399 chars: "DEF".to_string(),
400 mode: Some(EncodingMode::Radix),
401 padding: None,
402 start_codepoint: None,
403 start: None,
404 length: None,
405 common: true,
406 },
407 );
408
409 config1.merge(config2);
410
411 assert_eq!(config1.dictionaries.len(), 2);
412 assert_eq!(config1.get_dictionary("test1").unwrap().chars, "DEF");
413 assert_eq!(config1.get_dictionary("test2").unwrap().chars, "XYZ");
414 }
415
416 #[test]
417 fn test_load_from_toml_string() {
418 let toml_content = r#"
419[dictionaries.custom]
420chars = "0123456789"
421mode = "base_conversion"
422"#;
423 let config = DictionaryRegistry::from_toml(toml_content).unwrap();
424 assert!(config.dictionaries.contains_key("custom"));
425 assert_eq!(config.get_dictionary("custom").unwrap().chars, "0123456789");
426 }
427
428 #[test]
429 fn test_effective_chars_from_explicit() {
430 let config = DictionaryConfig {
431 chars: "ABCD".to_string(),
432 mode: None,
433 padding: None,
434 start_codepoint: None,
435 start: None,
436 length: None,
437 common: true,
438 };
439 assert_eq!(config.effective_chars().unwrap(), "ABCD");
440 }
441
442 #[test]
443 fn test_effective_chars_from_range() {
444 let config = DictionaryConfig {
445 chars: String::new(),
446 mode: None,
447 padding: None,
448 start_codepoint: None,
449 start: Some("A".to_string()),
450 length: Some(4),
451 common: true,
452 };
453 assert_eq!(config.effective_chars().unwrap(), "ABCD");
454 }
455
456 #[test]
457 fn test_effective_chars_explicit_takes_priority() {
458 let config = DictionaryConfig {
460 chars: "XYZ".to_string(),
461 mode: None,
462 padding: None,
463 start_codepoint: None,
464 start: Some("A".to_string()),
465 length: Some(4),
466 common: true,
467 };
468 assert_eq!(config.effective_chars().unwrap(), "XYZ");
469 }
470
471 #[test]
472 fn test_effective_chars_unicode_range() {
473 let config = DictionaryConfig {
475 chars: String::new(),
476 mode: None,
477 padding: None,
478 start_codepoint: None,
479 start: Some("가".to_string()), length: Some(4),
481 common: true,
482 };
483 let result = config.effective_chars().unwrap();
484 assert_eq!(result.chars().count(), 4);
485 assert_eq!(result, "가각갂갃");
486 }
487
488 #[test]
489 fn test_effective_chars_surrogate_gap_error() {
490 let config = DictionaryConfig {
492 chars: String::new(),
493 mode: None,
494 padding: None,
495 start_codepoint: None,
496 start: Some("\u{D700}".to_string()), length: Some(512), common: true,
499 };
500 assert!(config.effective_chars().is_err());
501 }
502
503 #[test]
504 fn test_effective_chars_exceeds_unicode_max() {
505 let config = DictionaryConfig {
507 chars: String::new(),
508 mode: None,
509 padding: None,
510 start_codepoint: None,
511 start: Some("\u{10FFFE}".to_string()), length: Some(10), common: true,
514 };
515 assert!(config.effective_chars().is_err());
516 }
517
518 #[test]
519 fn test_effective_mode_with_length_field() {
520 let config = DictionaryConfig {
522 chars: String::new(),
523 mode: None,
524 padding: None,
525 start_codepoint: None,
526 start: Some("A".to_string()),
527 length: Some(64), common: true,
529 };
530 assert_eq!(config.effective_mode(), EncodingMode::Chunked);
531
532 let config = DictionaryConfig {
533 chars: String::new(),
534 mode: None,
535 padding: None,
536 start_codepoint: None,
537 start: Some("A".to_string()),
538 length: Some(52), common: true,
540 };
541 assert_eq!(config.effective_mode(), EncodingMode::Radix);
542 }
543}