1use std::collections::{HashMap, HashSet};
8use std::path::Path;
9
10use rayon::prelude::*;
11
12use crate::entity;
13use crate::registry;
14use crate::staleness;
15use crate::tags;
16use crate::verifier;
17use crate::{load_registry, load_tag_registry, parse_full, resolve_case_files, resolve_content_root};
18
19pub fn validate(path: Option<&str>, root: Option<&str>, strict: bool) -> i32 {
29 let content_root = resolve_content_root(path, root);
30
31 let redirect_slugs = load_redirect_slugs(&content_root);
32 if !redirect_slugs.is_empty() {
33 eprintln!("redirects: {} entries loaded", redirect_slugs.len());
34 let collision_count = check_redirect_collisions(&redirect_slugs, &content_root);
35 if collision_count > 0 {
36 eprintln!(
37 "error: {collision_count} file(s) at old redirected paths — move them or remove the redirect entry"
38 );
39 return 1;
40 }
41 }
42
43 let reg = match load_registry(&content_root) {
44 Ok(r) => r,
45 Err(code) => return code,
46 };
47
48 let tag_reg = match load_tag_registry(&content_root) {
49 Ok(r) => r,
50 Err(code) => return code,
51 };
52
53 let case_files = match resolve_case_files(path, &content_root) {
54 Ok(f) => f,
55 Err(code) => return code,
56 };
57
58 if case_files.is_empty() {
59 eprintln!("no case files found");
60 return 1;
61 }
62
63 if !reg.is_empty() {
64 eprintln!("registry: {} entities loaded", reg.len());
65 }
66
67 if strict {
69 let filename_warnings = reg.check_filenames();
70 if !filename_warnings.is_empty() {
71 for w in &filename_warnings {
72 eprintln!("{w}");
73 }
74 eprintln!(
75 "error: {} filename warning(s) treated as errors (--strict)",
76 filename_warnings.len()
77 );
78 return 1;
79 }
80 }
81 if !tag_reg.is_empty() {
82 eprintln!(
83 "tags: {} tags loaded across {} categories",
84 tag_reg.len(),
85 tag_reg.category_slugs().len()
86 );
87 }
88
89 let mut entity_tag_errors = false;
90 for entry in reg.entries() {
91 let tag_errors = tag_reg.validate_tags(&entry.tags, 2);
92 for err in &tag_errors {
93 eprintln!("{}:{err}", entry.path.display());
94 }
95 if !tag_errors.is_empty() {
96 entity_tag_errors = true;
97 }
98 }
99
100 let results: Vec<ValidateResult> = case_files
101 .par_iter()
102 .map(|case_path| validate_single_case(case_path, ®, &tag_reg))
103 .collect();
104
105 let mut exit_code = i32::from(entity_tag_errors);
106 let mut all_events: Vec<(String, String)> = Vec::new();
107 let mut all_rel_ids: Vec<(String, String, usize)> = Vec::new();
108
109 for result in results {
110 if result.exit_code != 0 {
111 exit_code = result.exit_code;
112 }
113 all_events.extend(result.events);
114 all_rel_ids.extend(result.rel_ids);
115 }
116
117 if let Some(code) = check_duplicate_event_names(&all_events) {
118 exit_code = code;
119 }
120
121 if let Some(code) = check_duplicate_rel_ids(&all_rel_ids) {
122 exit_code = code;
123 }
124
125 check_qualifier_consistency(®, strict, &mut exit_code);
126
127 exit_code
128}
129
130struct ValidateResult {
131 exit_code: i32,
132 events: Vec<(String, String)>,
133 rel_ids: Vec<(String, String, usize)>,
134}
135
136fn validate_single_case(
137 path: &str,
138 reg: ®istry::EntityRegistry,
139 tag_reg: &tags::TagRegistry,
140) -> ValidateResult {
141 let content = match std::fs::read_to_string(path) {
142 Ok(c) => c,
143 Err(e) => {
144 eprintln!("{path}: error reading file: {e}");
145 return ValidateResult {
146 exit_code: 2,
147 events: Vec::new(),
148 rel_ids: Vec::new(),
149 };
150 }
151 };
152
153 match parse_full(&content, Some(reg)) {
154 Ok((case, entities, rels)) => {
155 eprintln!(
156 "{path}: ok -- {id}: {title} ({ent} entities, {rel} relationships, {src} sources)",
157 id = case.id.as_deref().unwrap_or("(no id)"),
158 title = case.title,
159 ent = entities.len(),
160 rel = rels.len(),
161 src = case.sources.len(),
162 );
163 if !case.summary.is_empty() {
164 eprintln!(
165 " summary: {}...",
166 &case.summary[..case.summary.len().min(80)]
167 );
168 }
169 for e in &entities {
170 let id_display = e.id.as_deref().unwrap_or("(no id)");
171 eprintln!(
172 " line {}: {id_display} {} ({}, {} fields)",
173 e.line, e.name, e.label, e.fields.len()
174 );
175 }
176
177 let events: Vec<(String, String)> = entities
178 .iter()
179 .filter(|e| e.label == entity::Label::Event)
180 .map(|e| (e.name.clone(), path.to_string()))
181 .collect();
182
183 for r in &rels {
184 let id_display = r.id.as_deref().unwrap_or("(no id)");
185 eprintln!(
186 " line {}: {id_display} {} -> {}: {}",
187 r.line, r.source_name, r.target_name, r.rel_type,
188 );
189 }
190
191 let mut exit_code = 0;
192 let tag_errors = tag_reg.validate_tags(&case.tags, 2);
193 for err in &tag_errors {
194 eprintln!("{path}:{err}");
195 }
196 if !tag_errors.is_empty() {
197 exit_code = 1;
198 }
199
200 let rel_ids: Vec<(String, String, usize)> = rels
201 .iter()
202 .filter_map(|r| {
203 r.id.as_ref()
204 .map(|id| (id.clone(), path.to_string(), r.line))
205 })
206 .collect();
207
208 ValidateResult {
209 exit_code,
210 events,
211 rel_ids,
212 }
213 }
214 Err(errors) => {
215 for err in &errors {
216 eprintln!("{path}:{err}");
217 }
218 ValidateResult {
219 exit_code: 1,
220 events: Vec::new(),
221 rel_ids: Vec::new(),
222 }
223 }
224 }
225}
226
227fn check_duplicate_event_names(all_events: &[(String, String)]) -> Option<i32> {
228 let mut seen: HashMap<&str, &str> = HashMap::new();
229 let mut has_duplicates = false;
230
231 for (name, path) in all_events {
232 if let Some(&first_path) = seen.get(name.as_str()) {
233 eprintln!(
234 "error: duplicate event name {name:?} in {path} (first defined in {first_path})"
235 );
236 has_duplicates = true;
237 } else {
238 seen.insert(name, path);
239 }
240 }
241
242 if has_duplicates { Some(1) } else { None }
243}
244
245fn check_duplicate_rel_ids(all_rel_ids: &[(String, String, usize)]) -> Option<i32> {
246 let mut seen: HashMap<&str, (&str, usize)> = HashMap::new();
247 let mut has_duplicates = false;
248
249 for (id, path, line) in all_rel_ids {
250 if let Some(&(first_path, first_line)) = seen.get(id.as_str()) {
251 eprintln!(
252 "error: duplicate relationship id {id:?} at {path}:{line} (first defined at {first_path}:{first_line})"
253 );
254 has_duplicates = true;
255 } else {
256 seen.insert(id, (path, *line));
257 }
258 }
259
260 if has_duplicates { Some(1) } else { None }
261}
262
263fn check_qualifier_consistency(reg: ®istry::EntityRegistry, strict: bool, exit_code: &mut i32) {
264 use entity::FieldValue;
265
266 let mut by_lower: HashMap<String, Vec<(String, String)>> = HashMap::new();
267
268 for entry in reg.entries() {
269 let qualifier = entry
270 .entity
271 .fields
272 .iter()
273 .find(|(k, _)| k == "qualifier")
274 .and_then(|(_, v)| match v {
275 FieldValue::Single(s) => Some(s.as_str()),
276 FieldValue::List(_) => None,
277 });
278
279 if let Some(q) = qualifier {
280 by_lower
281 .entry(q.to_lowercase())
282 .or_default()
283 .push((q.to_string(), entry.path.display().to_string()));
284 }
285 }
286
287 let mut inconsistencies = 0usize;
288 for occurrences in by_lower.values() {
289 let first = &occurrences[0].0;
290 let inconsistent: Vec<_> = occurrences.iter().filter(|(q, _)| q != first).collect();
291 if !inconsistent.is_empty() {
292 inconsistencies += 1;
293 eprintln!(
294 "warning: inconsistent qualifier casing for {:?}:",
295 occurrences[0].0
296 );
297 for (q, path) in occurrences {
298 eprintln!(" {path}: {q:?}");
299 }
300 }
301 }
302
303 if strict && inconsistencies > 0 {
304 eprintln!(
305 "error: {inconsistencies} qualifier consistency warning(s) treated as errors (--strict)"
306 );
307 *exit_code = 1;
308 }
309}
310
311fn load_redirect_slugs(content_root: &Path) -> HashSet<String> {
313 let path = content_root.join("redirects.yaml");
314 let Ok(content) = std::fs::read_to_string(&path) else {
315 return HashSet::new();
316 };
317
318 let mut slugs = HashSet::new();
319 for line in content.lines() {
320 if let Some(from) = line.strip_prefix(" - from: ") {
321 slugs.insert(from.trim().to_string());
322 }
323 }
324 slugs
325}
326
327fn check_redirect_collisions(redirect_slugs: &HashSet<String>, content_root: &Path) -> usize {
328 if redirect_slugs.is_empty() {
329 return 0;
330 }
331
332 let mut errors = 0;
333 let root_str = content_root.to_string_lossy();
334
335 for slug in redirect_slugs {
336 let file_path = content_root.join(format!("{slug}.md"));
337 if file_path.exists() {
338 eprintln!(
339 "{root_str}/{slug}.md: error: file at old redirected path (see redirects.yaml)"
340 );
341 errors += 1;
342 }
343 }
344
345 errors
346}
347
348pub struct VerifyConfig {
352 pub concurrency: usize,
354 pub timeout: u64,
356 pub cache_path: Option<String>,
358 pub warn_only: bool,
360}
361
362pub fn verify(path: Option<&str>, root: Option<&str>, config: &VerifyConfig) -> i32 {
366 let content_root = resolve_content_root(path, root);
367 let reg = match load_registry(&content_root) {
368 Ok(r) => r,
369 Err(code) => return code,
370 };
371
372 let case_files = match resolve_case_files(path, &content_root) {
373 Ok(f) => f,
374 Err(code) => return code,
375 };
376
377 if case_files.is_empty() {
378 eprintln!("no case files found");
379 return 1;
380 }
381
382 let mut exit_code = 0;
383 for case_path in &case_files {
384 let result = verify_single_case(case_path, ®, config);
385 if result != 0 {
386 exit_code = result;
387 }
388 }
389
390 let reg_result = verify_registry_thumbnails(®, config);
391 if reg_result != 0 {
392 exit_code = reg_result;
393 }
394
395 exit_code
396}
397
398fn verify_single_case(
399 path: &str,
400 reg: ®istry::EntityRegistry,
401 config: &VerifyConfig,
402) -> i32 {
403 let content = match std::fs::read_to_string(path) {
404 Ok(c) => c,
405 Err(e) => {
406 eprintln!("{path}: error reading file: {e}");
407 return 2;
408 }
409 };
410
411 let (case, entities, rels) = match parse_full(&content, Some(reg)) {
412 Ok(result) => result,
413 Err(errors) => {
414 for err in &errors {
415 eprintln!("{path}:{err}");
416 }
417 return 1;
418 }
419 };
420
421 let mut collect_errors = Vec::new();
422 let urls = verifier::collect_urls(&case.sources, &entities, &rels, &mut collect_errors);
423
424 if !collect_errors.is_empty() {
425 for err in &collect_errors {
426 eprintln!("{path}:{err}");
427 }
428 return 1;
429 }
430
431 if urls.is_empty() {
432 eprintln!("{path}: no URLs to verify");
433 return 0;
434 }
435
436 run_url_verification(path, &urls, config)
437}
438
439fn verify_registry_thumbnails(
440 reg: ®istry::EntityRegistry,
441 config: &VerifyConfig,
442) -> i32 {
443 let urls = verifier::collect_registry_urls(reg);
444 if urls.is_empty() {
445 return 0;
446 }
447 run_url_verification("(registry)", &urls, config)
448}
449
450fn run_url_verification(
451 label: &str,
452 urls: &[verifier::UrlEntry],
453 config: &VerifyConfig,
454) -> i32 {
455 let mut verify_cache = load_verify_cache(label, config.cache_path.as_deref());
456
457 let (cached_results, urls_to_check) = partition_cached(urls, verify_cache.as_ref());
458 let check_count = urls_to_check.len();
459 let cached_count = cached_results.len();
460
461 if cached_count > 0 {
462 eprintln!(
463 "{label}: {cached_count} cached, {check_count} to check \
464 (concurrency={}, timeout={}s)",
465 config.concurrency, config.timeout
466 );
467 } else {
468 eprintln!(
469 "{label}: verifying {check_count} URLs \
470 (concurrency={}, timeout={}s)",
471 config.concurrency, config.timeout
472 );
473 }
474
475 let fresh_results = if urls_to_check.is_empty() {
476 Vec::new()
477 } else {
478 let rt = match tokio::runtime::Builder::new_current_thread()
479 .enable_all()
480 .build()
481 {
482 Ok(rt) => rt,
483 Err(e) => {
484 eprintln!("{label}: failed to create async runtime: {e}");
485 return 2;
486 }
487 };
488 rt.block_on(verifier::verify_urls(
489 urls_to_check,
490 config.concurrency,
491 config.timeout,
492 ))
493 };
494
495 if let Some(ref mut vc) = verify_cache {
496 for check in &fresh_results {
497 vc.put(&check.url, check.status, check.detail.as_deref());
498 }
499 }
500
501 let mut all_results = cached_results;
502 all_results.extend(fresh_results);
503
504 let has_error = print_verification_results(label, &all_results);
505
506 if let Some(ref vc) = verify_cache
507 && let Err(e) = vc.save()
508 {
509 eprintln!("{label}: cache save warning: {e}");
510 }
511
512 i32::from(has_error && !config.warn_only)
513}
514
515fn load_verify_cache(
516 label: &str,
517 cache_path: Option<&str>,
518) -> Option<crate::cache::VerifyCache> {
519 cache_path.map(|p| match crate::cache::VerifyCache::load(p) {
520 Ok(c) => {
521 eprintln!("{label}: using cache {p}");
522 c
523 }
524 Err(e) => {
525 eprintln!("{label}: cache load warning: {e}");
526 crate::cache::VerifyCache::load("/dev/null")
527 .unwrap_or_else(|_| crate::cache::VerifyCache::empty())
528 }
529 })
530}
531
532fn print_verification_results(label: &str, results: &[verifier::UrlCheck]) -> bool {
533 let mut has_error = false;
534
535 for check in results {
536 let detail = check.detail.as_deref().unwrap_or("");
537 match check.status {
538 verifier::CheckStatus::Ok => {
539 eprintln!(
540 " ok {}{}",
541 check.url,
542 if check.is_thumbnail {
543 " [thumbnail]"
544 } else {
545 ""
546 }
547 );
548 }
549 verifier::CheckStatus::Warn => {
550 eprintln!(" warn {} -- {detail}", check.url);
551 }
552 verifier::CheckStatus::Error => {
553 has_error = true;
554 eprintln!(" ERROR {} -- {detail}", check.url);
555 }
556 }
557 }
558
559 let ok_count = results
560 .iter()
561 .filter(|c| c.status == verifier::CheckStatus::Ok)
562 .count();
563 let warn_count = results
564 .iter()
565 .filter(|c| c.status == verifier::CheckStatus::Warn)
566 .count();
567 let err_count = results
568 .iter()
569 .filter(|c| c.status == verifier::CheckStatus::Error)
570 .count();
571
572 eprintln!("{label}: {ok_count} ok, {warn_count} warn, {err_count} error");
573
574 has_error
575}
576
577fn partition_cached(
578 urls: &[verifier::UrlEntry],
579 verify_cache: Option<&crate::cache::VerifyCache>,
580) -> (Vec<verifier::UrlCheck>, Vec<verifier::UrlEntry>) {
581 let Some(vc) = verify_cache else {
582 return (Vec::new(), urls.to_vec());
583 };
584
585 let mut cached = Vec::new();
586 let mut uncached = Vec::new();
587
588 for entry in urls {
589 if let Some(cache_entry) = vc.get(entry.url()) {
590 let status = match cache_entry.status.as_str() {
591 "ok" => verifier::CheckStatus::Ok,
592 "warn" => verifier::CheckStatus::Warn,
593 _ => verifier::CheckStatus::Error,
594 };
595 cached.push(verifier::UrlCheck {
596 url: entry.url().to_string(),
597 status,
598 detail: cache_entry.detail.clone(),
599 is_thumbnail: entry.is_thumbnail(),
600 });
601 } else {
602 uncached.push(entry.clone());
603 }
604 }
605
606 (cached, uncached)
607}
608
609pub struct StalenessConfig {
613 pub investigation_months: u32,
615 pub trial_months: u32,
617 pub appeal_months: u32,
619}
620
621impl Default for StalenessConfig {
622 fn default() -> Self {
623 Self {
624 investigation_months: 6,
625 trial_months: 12,
626 appeal_months: 12,
627 }
628 }
629}
630
631pub fn check_staleness(path: Option<&str>, root: Option<&str>, config: &StalenessConfig) -> i32 {
635 let content_root = resolve_content_root(path, root);
636 let reg = match load_registry(&content_root) {
637 Ok(r) => r,
638 Err(code) => return code,
639 };
640
641 let case_files = match resolve_case_files(path, &content_root) {
642 Ok(f) => f,
643 Err(code) => return code,
644 };
645
646 if case_files.is_empty() {
647 eprintln!("no case files found");
648 return 1;
649 }
650
651 let thresholds = staleness::Thresholds {
652 investigation_months: config.investigation_months,
653 trial_months: config.trial_months,
654 appeal_months: config.appeal_months,
655 };
656
657 let now = chrono_today();
658
659 let mut all_findings: Vec<(String, staleness::Finding)> = Vec::new();
660
661 for case_path in &case_files {
662 let content = match std::fs::read_to_string(case_path) {
663 Ok(c) => c,
664 Err(e) => {
665 eprintln!("{case_path}: error reading file: {e}");
666 continue;
667 }
668 };
669
670 let (case, entities, _rels) = match parse_full(&content, Some(®)) {
671 Ok(result) => result,
672 Err(errors) => {
673 for err in &errors {
674 eprintln!("{case_path}:{err}");
675 }
676 continue;
677 }
678 };
679
680 let findings = staleness::check_case(&case, &entities, &thresholds, now);
681 for finding in findings {
682 all_findings.push((case_path.clone(), finding));
683 }
684 }
685
686 all_findings.sort_by_key(|a| a.1.severity);
687
688 let mut errors = 0u32;
689 let mut warnings = 0u32;
690 let mut infos = 0u32;
691
692 for (path, finding) in &all_findings {
693 eprintln!("{}: {path}: {}", finding.severity, finding.message);
694 match finding.severity {
695 staleness::Severity::Error => errors += 1,
696 staleness::Severity::Warning => warnings += 1,
697 staleness::Severity::Info => infos += 1,
698 }
699 }
700
701 eprintln!(
702 "staleness: {errors} error(s), {warnings} warning(s), {infos} info(s) across {} case(s)",
703 case_files.len()
704 );
705
706 i32::from(errors > 0)
707}
708
709fn chrono_today() -> (i32, u32, u32) {
711 let now = std::time::SystemTime::now();
712 let since_epoch = now
713 .duration_since(std::time::UNIX_EPOCH)
714 .unwrap_or_default();
715 let days = since_epoch.as_secs() / 86400;
716 days_to_date(days)
717}
718
719#[allow(
722 clippy::cast_possible_truncation,
723 clippy::cast_sign_loss,
724 clippy::cast_possible_wrap
725)]
726fn days_to_date(days: u64) -> (i32, u32, u32) {
727 let z = i64::from(u32::try_from(days).unwrap_or(u32::MAX)) + 719_468;
728 let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
729 let doe = (z - era * 146_097) as u32;
730 let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
731 let y = (i64::from(yoe) + era * 400) as i32;
732 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
733 let mp = (5 * doy + 2) / 153;
734 let d = doy - (153 * mp + 2) / 5 + 1;
735 let m = if mp < 10 { mp + 3 } else { mp - 9 };
736 let y = if m <= 2 { y + 1 } else { y };
737 (y, m, d)
738}
739
740#[cfg(test)]
741mod tests {
742 use super::*;
743
744 #[test]
745 fn days_to_date_epoch() {
746 assert_eq!(days_to_date(0), (1970, 1, 1));
747 }
748
749 #[test]
750 fn days_to_date_known() {
751 assert_eq!(days_to_date(20089), (2025, 1, 1));
753 }
754
755 #[test]
756 fn redirect_slugs_empty_on_missing_file() {
757 let slugs = load_redirect_slugs(Path::new("/nonexistent"));
758 assert!(slugs.is_empty());
759 }
760
761 #[test]
762 fn staleness_config_defaults() {
763 let config = StalenessConfig::default();
764 assert_eq!(config.investigation_months, 6);
765 assert_eq!(config.trial_months, 12);
766 assert_eq!(config.appeal_months, 12);
767 }
768}