1use std::collections::HashSet;
2use std::path::Path;
3
4use serde::Deserialize;
5
6use crate::error::ConfigError;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
10pub enum Severity {
11 #[default]
13 Error,
14 Warn,
16 Off,
18}
19
20impl std::fmt::Display for Severity {
21 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22 match self {
23 Self::Error => f.write_str("error"),
24 Self::Warn => f.write_str("warn"),
25 Self::Off => f.write_str("off"),
26 }
27 }
28}
29
30impl<'de> Deserialize<'de> for Severity {
31 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
32 let s = String::deserialize(deserializer)?;
33 match s.as_str() {
34 "error" => Ok(Self::Error),
35 "warn" => Ok(Self::Warn),
36 "off" => Ok(Self::Off),
37 other => Err(serde::de::Error::unknown_variant(
38 other,
39 &["error", "warn", "off"],
40 )),
41 }
42 }
43}
44
45#[derive(Debug, Clone)]
47pub struct WikiConfig {
48 pub index: Option<String>,
50 pub directories: Vec<DirectoryConfig>,
52 pub linking: LinkingConfig,
54 pub checks: ChecksConfig,
56 pub rules: Vec<RuleConfig>,
58}
59
60#[derive(Debug, Clone)]
62pub struct DirectoryConfig {
63 pub path: String,
65 pub autolink: bool,
67}
68
69#[derive(Debug, Clone)]
71pub struct LinkingConfig {
72 pub exclude: HashSet<String>,
74 pub autolink_field: String,
76}
77
78#[derive(Debug, Clone)]
80pub struct ChecksConfig {
81 pub broken_links: Severity,
82 pub orphan_pages: Severity,
83 pub index_coverage: Severity,
84}
85
86#[derive(Debug, Clone)]
88pub enum RuleConfig {
89 RequiredSections {
90 dirs: Vec<String>,
91 sections: Vec<String>,
92 severity: Severity,
93 },
94 RequiredFrontmatter {
95 dirs: Vec<String>,
96 fields: Vec<String>,
97 severity: Severity,
98 },
99 MirrorParity {
100 left: String,
101 right: String,
102 severity: Severity,
103 },
104 CitationPattern {
105 name: String,
106 dirs: Vec<String>,
107 pattern: String,
108 match_in: String,
109 match_mode: MatchMode,
110 severity: Severity,
111 },
112}
113
114impl RuleConfig {
115 pub fn severity(&self) -> Severity {
116 match self {
117 Self::RequiredSections { severity, .. }
118 | Self::RequiredFrontmatter { severity, .. }
119 | Self::MirrorParity { severity, .. }
120 | Self::CitationPattern { severity, .. } => *severity,
121 }
122 }
123}
124
125#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
127pub enum MatchMode {
128 #[default]
130 Content,
131 Filename,
133}
134
135impl<'de> Deserialize<'de> for MatchMode {
136 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
137 let s = String::deserialize(deserializer)?;
138 match s.as_str() {
139 "content" => Ok(Self::Content),
140 "filename" => Ok(Self::Filename),
141 other => Err(serde::de::Error::unknown_variant(
142 other,
143 &["content", "filename"],
144 )),
145 }
146 }
147}
148
149#[derive(Deserialize)]
152struct RawConfig {
153 index: Option<String>,
154 #[serde(default)]
155 directories: Vec<RawDirectoryConfig>,
156 #[serde(default)]
157 linking: RawLinkingConfig,
158 #[serde(default)]
159 checks: RawChecksConfig,
160 #[serde(default)]
161 rules: Vec<RawRuleConfig>,
162}
163
164#[derive(Deserialize)]
165struct RawDirectoryConfig {
166 path: String,
167 #[serde(default = "default_true")]
168 autolink: bool,
169}
170
171fn default_true() -> bool {
172 true
173}
174
175#[derive(Deserialize, Default)]
176struct RawLinkingConfig {
177 #[serde(default)]
178 exclude: Vec<String>,
179 #[serde(default = "default_autolink_field")]
180 autolink_field: String,
181}
182
183fn default_autolink_field() -> String {
184 "autolink".to_owned()
185}
186
187#[derive(Deserialize, Default)]
188struct RawChecksConfig {
189 #[serde(default)]
190 broken_links: Option<Severity>,
191 #[serde(default)]
192 orphan_pages: Option<Severity>,
193 #[serde(default)]
194 index_coverage: Option<Severity>,
195}
196
197#[derive(Deserialize)]
198#[serde(tag = "check")]
199enum RawRuleConfig {
200 #[serde(rename = "required-sections")]
201 RequiredSections {
202 dirs: Vec<String>,
203 sections: Vec<String>,
204 #[serde(default)]
205 severity: Option<Severity>,
206 },
207 #[serde(rename = "required-frontmatter")]
208 RequiredFrontmatter {
209 dirs: Vec<String>,
210 fields: Vec<String>,
211 #[serde(default)]
212 severity: Option<Severity>,
213 },
214 #[serde(rename = "mirror-parity")]
215 MirrorParity {
216 left: String,
217 right: String,
218 #[serde(default)]
219 severity: Option<Severity>,
220 },
221 #[serde(rename = "citation-pattern")]
222 CitationPattern {
223 name: String,
224 dirs: Vec<String>,
225 #[serde(default)]
226 pattern: Option<String>,
227 #[serde(default)]
228 preset: Option<String>,
229 match_in: String,
230 #[serde(default)]
231 match_mode: Option<MatchMode>,
232 #[serde(default)]
233 severity: Option<Severity>,
234 },
235}
236
237fn resolve_preset(name: &str) -> Result<(String, MatchMode), ConfigError> {
239 match name {
240 "bold-method-year" => Ok((
241 r"\*\*(?P<id>[A-Za-z][A-Za-z0-9-]+)\*\*\s*\([^)]*\d{4}[^)]*\)".to_owned(),
242 MatchMode::Filename,
243 )),
244 other => Err(ConfigError::UnknownPreset(other.to_owned())),
245 }
246}
247
248impl WikiConfig {
249 pub fn load(root: &Path) -> Result<Option<Self>, ConfigError> {
252 let config_path = root.join("wiki.toml");
253 let content = match std::fs::read_to_string(&config_path) {
254 Ok(content) => content,
255 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
256 Err(e) => {
257 return Err(ConfigError::Read {
258 path: config_path,
259 source: e,
260 });
261 }
262 };
263 let raw: RawConfig = toml::from_str(&content).map_err(|e| ConfigError::Parse {
264 path: config_path,
265 source: e,
266 })?;
267 Self::from_raw(raw).map(Some)
268 }
269
270 pub fn auto_detect(root: &Path) -> Self {
272 let has_wiki_dir = root.join("wiki").is_dir();
273 let dir_path = if has_wiki_dir { "wiki" } else { "." };
274
275 Self {
276 index: Some("index.md".to_owned()),
277 directories: vec![DirectoryConfig {
278 path: dir_path.to_owned(),
279 autolink: true,
280 }],
281 linking: LinkingConfig {
282 exclude: HashSet::new(),
283 autolink_field: default_autolink_field(),
284 },
285 checks: ChecksConfig {
286 broken_links: Severity::Error,
287 orphan_pages: Severity::Error,
288 index_coverage: Severity::Error,
289 },
290 rules: Vec::new(),
291 }
292 }
293
294 pub fn load_or_detect(root: &Path) -> Result<Self, ConfigError> {
296 match Self::load(root)? {
297 Some(config) => Ok(config),
298 None => Ok(Self::auto_detect(root)),
299 }
300 }
301
302 fn from_raw(raw: RawConfig) -> Result<Self, ConfigError> {
303 let mut directories: Vec<DirectoryConfig> = if raw.directories.is_empty() {
304 vec![DirectoryConfig {
306 path: "wiki".to_owned(),
307 autolink: true,
308 }]
309 } else {
310 raw.directories
311 .into_iter()
312 .map(|d| DirectoryConfig {
313 path: normalize_path(&d.path),
314 autolink: d.autolink,
315 })
316 .collect()
317 };
318
319 directories.sort_by(|a, b| b.path.len().cmp(&a.path.len()));
321
322 let linking = LinkingConfig {
323 exclude: raw.linking.exclude.into_iter().collect(),
324 autolink_field: raw.linking.autolink_field,
325 };
326
327 let checks = ChecksConfig {
328 broken_links: raw.checks.broken_links.unwrap_or(Severity::Error),
329 orphan_pages: raw.checks.orphan_pages.unwrap_or(Severity::Error),
330 index_coverage: raw.checks.index_coverage.unwrap_or(Severity::Error),
331 };
332
333 let mut rules = Vec::new();
334 for raw_rule in raw.rules {
335 rules.push(convert_rule(raw_rule)?);
336 }
337
338 for rule in &rules {
340 if let RuleConfig::CitationPattern { pattern, name, .. } = rule {
341 regex_lite::Regex::new(pattern).map_err(|e| ConfigError::InvalidPattern {
342 name: name.clone(),
343 source: e,
344 })?;
345 }
346 }
347
348 Ok(Self {
349 index: match raw.index {
350 Some(s) if s.is_empty() => None,
351 Some(s) => Some(s),
352 None => Some("index.md".to_owned()),
353 },
354 directories,
355 linking,
356 checks,
357 rules,
358 })
359 }
360
361 pub fn directory_for(&self, rel_path: &Path) -> Option<&DirectoryConfig> {
364 let rel_str = rel_path.to_str()?;
365 self.directories
367 .iter()
368 .find(|d| rel_str.starts_with(&d.path) || d.path == ".")
369 }
370
371 pub fn is_autolink_dir(&self, rel_path: &Path) -> bool {
373 self.directory_for(rel_path)
374 .map(|d| d.autolink)
375 .unwrap_or(false)
376 }
377
378 pub fn matches_dirs(rel_path: &Path, dirs: &[String]) -> bool {
380 let Some(rel_str) = rel_path.to_str() else {
381 return false;
382 };
383 dirs.iter().any(|d| rel_str.starts_with(d.as_str()))
384 }
385
386 pub fn mirror_paths(&self) -> Vec<(&str, &str)> {
388 self.rules
389 .iter()
390 .filter_map(|r| match r {
391 RuleConfig::MirrorParity { left, right, .. } => {
392 Some((left.as_str(), right.as_str()))
393 }
394 _ => None,
395 })
396 .collect()
397 }
398}
399
400fn convert_rule(raw: RawRuleConfig) -> Result<RuleConfig, ConfigError> {
401 match raw {
402 RawRuleConfig::RequiredSections {
403 dirs,
404 sections,
405 severity,
406 } => Ok(RuleConfig::RequiredSections {
407 dirs: dirs.into_iter().map(|d| normalize_path(&d)).collect(),
408 sections,
409 severity: severity.unwrap_or(Severity::Error),
410 }),
411 RawRuleConfig::RequiredFrontmatter {
412 dirs,
413 fields,
414 severity,
415 } => Ok(RuleConfig::RequiredFrontmatter {
416 dirs: dirs.into_iter().map(|d| normalize_path(&d)).collect(),
417 fields,
418 severity: severity.unwrap_or(Severity::Error),
419 }),
420 RawRuleConfig::MirrorParity {
421 left,
422 right,
423 severity,
424 } => Ok(RuleConfig::MirrorParity {
425 left: normalize_path(&left),
426 right: normalize_path(&right),
427 severity: severity.unwrap_or(Severity::Error),
428 }),
429 RawRuleConfig::CitationPattern {
430 name,
431 dirs,
432 pattern,
433 preset,
434 match_in,
435 match_mode,
436 severity,
437 } => {
438 let (resolved_pattern, resolved_mode) = match (pattern, preset) {
439 (Some(p), None) => (p, match_mode.unwrap_or(MatchMode::Content)),
440 (None, Some(preset_name)) => {
441 let (p, m) = resolve_preset(&preset_name)?;
442 (p, match_mode.unwrap_or(m))
443 }
444 (Some(_), Some(_)) => {
445 return Err(ConfigError::Validation(format!(
446 "citation-pattern '{name}': cannot specify both 'pattern' and 'preset'"
447 )));
448 }
449 (None, None) => {
450 return Err(ConfigError::Validation(format!(
451 "citation-pattern '{name}': must specify either 'pattern' or 'preset'"
452 )));
453 }
454 };
455 Ok(RuleConfig::CitationPattern {
456 name,
457 dirs: dirs.into_iter().map(|d| normalize_path(&d)).collect(),
458 pattern: resolved_pattern,
459 match_in: normalize_path(&match_in),
460 match_mode: resolved_mode,
461 severity: severity.unwrap_or(Severity::Warn),
462 })
463 }
464 }
465}
466
467fn normalize_path(path: &str) -> String {
469 path.trim_end_matches('/').to_owned()
470}
471
472#[cfg(test)]
473mod tests {
474 use super::*;
475
476 #[test]
477 fn parses_minimal_config() {
478 let toml = r#"
479[[directories]]
480path = "wiki"
481"#;
482 let raw: RawConfig = toml::from_str(toml).unwrap();
483 let config = WikiConfig::from_raw(raw).unwrap();
484 assert_eq!(config.directories.len(), 1);
485 assert_eq!(config.directories[0].path, "wiki");
486 assert!(config.directories[0].autolink);
487 assert_eq!(config.checks.broken_links, Severity::Error);
488 }
489
490 #[test]
491 fn parses_full_config() {
492 let toml = r#"
493index = "contents.md"
494
495[[directories]]
496path = "wiki"
497
498[[directories]]
499path = "wiki/papers"
500autolink = false
501
502[linking]
503exclude = ["the", "a"]
504autolink_field = "auto"
505
506[checks]
507broken_links = "error"
508orphan_pages = "warn"
509index_coverage = "off"
510
511[[rules]]
512check = "required-sections"
513dirs = ["wiki/concepts"]
514sections = ["See also"]
515severity = "error"
516
517[[rules]]
518check = "mirror-parity"
519left = "wiki/papers"
520right = "raw/papers"
521severity = "warn"
522
523[[rules]]
524check = "citation-pattern"
525name = "arxiv"
526dirs = ["wiki"]
527pattern = 'arxiv\.org/abs/(?P<id>\d{4}\.\d{4,5})'
528match_in = "wiki/papers"
529severity = "warn"
530
531[[rules]]
532check = "citation-pattern"
533name = "bold-method"
534preset = "bold-method-year"
535dirs = ["wiki"]
536match_in = "wiki/papers"
537severity = "warn"
538"#;
539 let raw: RawConfig = toml::from_str(toml).unwrap();
540 let config = WikiConfig::from_raw(raw).unwrap();
541
542 assert_eq!(config.index.as_deref(), Some("contents.md"));
543 assert!(config.linking.exclude.contains("the"));
544 assert_eq!(config.linking.autolink_field, "auto");
545 assert_eq!(config.checks.orphan_pages, Severity::Warn);
546 assert_eq!(config.checks.index_coverage, Severity::Off);
547 assert_eq!(config.rules.len(), 4);
548
549 assert_eq!(config.directories[0].path, "wiki/papers");
551 assert!(!config.directories[0].autolink);
552 assert_eq!(config.directories[1].path, "wiki");
553 assert!(config.directories[1].autolink);
554 }
555
556 #[test]
557 fn directory_resolution_most_specific_wins() {
558 let config = WikiConfig {
559 index: None,
560 directories: vec![
561 DirectoryConfig {
562 path: "wiki/papers".to_owned(),
563 autolink: false,
564 },
565 DirectoryConfig {
566 path: "wiki".to_owned(),
567 autolink: true,
568 },
569 ],
570 linking: LinkingConfig {
571 exclude: HashSet::new(),
572 autolink_field: "autolink".to_owned(),
573 },
574 checks: ChecksConfig {
575 broken_links: Severity::Error,
576 orphan_pages: Severity::Error,
577 index_coverage: Severity::Error,
578 },
579 rules: Vec::new(),
580 };
581
582 assert!(config.is_autolink_dir(Path::new("wiki/concepts/GRPO.md")));
583 assert!(!config.is_autolink_dir(Path::new("wiki/papers/deepseek.md")));
584 }
585
586 #[test]
587 fn auto_detect_with_wiki_dir() {
588 let dir = tempfile::tempdir().unwrap();
589 std::fs::create_dir(dir.path().join("wiki")).unwrap();
590 std::fs::write(dir.path().join("index.md"), "# Index").unwrap();
591
592 let config = WikiConfig::auto_detect(dir.path());
593 assert_eq!(config.directories[0].path, "wiki");
594 assert_eq!(config.index.as_deref(), Some("index.md"));
595 }
596
597 #[test]
598 fn auto_detect_flat_wiki() {
599 let dir = tempfile::tempdir().unwrap();
600 std::fs::write(dir.path().join("index.md"), "# Index").unwrap();
601
602 let config = WikiConfig::auto_detect(dir.path());
603 assert_eq!(config.directories[0].path, ".");
604 }
605
606 #[test]
607 fn rejects_pattern_and_preset_together() {
608 let toml = r#"
609[[rules]]
610check = "citation-pattern"
611name = "test"
612dirs = ["wiki"]
613pattern = "foo"
614preset = "bold-method-year"
615match_in = "wiki"
616"#;
617 let raw: RawConfig = toml::from_str(toml).unwrap();
618 let err = WikiConfig::from_raw(raw).unwrap_err();
619 assert!(err.to_string().contains("cannot specify both"));
620 }
621
622 #[test]
623 fn rejects_unknown_preset() {
624 let toml = r#"
625[[rules]]
626check = "citation-pattern"
627name = "test"
628dirs = ["wiki"]
629preset = "nonexistent"
630match_in = "wiki"
631"#;
632 let raw: RawConfig = toml::from_str(toml).unwrap();
633 let err = WikiConfig::from_raw(raw).unwrap_err();
634 assert!(err.to_string().contains("nonexistent"));
635 }
636
637 #[test]
638 fn matches_dirs_prefix() {
639 assert!(WikiConfig::matches_dirs(
640 Path::new("wiki/concepts/GRPO.md"),
641 &["wiki/concepts".to_owned()]
642 ));
643 assert!(WikiConfig::matches_dirs(
644 Path::new("wiki/concepts/GRPO.md"),
645 &["wiki".to_owned()]
646 ));
647 assert!(!WikiConfig::matches_dirs(
648 Path::new("wiki/papers/foo.md"),
649 &["wiki/concepts".to_owned()]
650 ));
651 }
652}