1use std::collections::{HashMap, HashSet};
8use std::path::Path;
9
10use rayon::prelude::*;
11
12use crate::entity;
13use crate::registry;
14use crate::staleness;
15use crate::tags;
16use crate::verifier;
17use crate::{load_registry, load_tag_registry, parse_full, resolve_case_files, resolve_content_root};
18
19pub fn validate(path: Option<&str>, root: Option<&str>, strict: bool, quiet: bool) -> i32 {
32 let content_root = resolve_content_root(path, root);
33
34 let redirect_slugs = load_redirect_slugs(&content_root);
35 if !redirect_slugs.is_empty() {
36 eprintln!("redirects: {} entries loaded", redirect_slugs.len());
37 let collision_count = check_redirect_collisions(&redirect_slugs, &content_root);
38 if collision_count > 0 {
39 eprintln!(
40 "error: {collision_count} file(s) at old redirected paths — move them or remove the redirect entry"
41 );
42 return 1;
43 }
44 }
45
46 let reg = match load_registry(&content_root) {
47 Ok(r) => r,
48 Err(code) => return code,
49 };
50
51 let tag_reg = match load_tag_registry(&content_root) {
52 Ok(r) => r,
53 Err(code) => return code,
54 };
55
56 let case_files = match resolve_case_files(path, &content_root) {
57 Ok(f) => f,
58 Err(code) => return code,
59 };
60
61 if case_files.is_empty() {
62 eprintln!("no case files found");
63 return 1;
64 }
65
66 if !reg.is_empty() {
67 eprintln!("registry: {} entities loaded", reg.len());
68 }
69
70 if strict {
72 let filename_warnings = reg.check_filenames();
73 if !filename_warnings.is_empty() {
74 for w in &filename_warnings {
75 eprintln!("{w}");
76 }
77 eprintln!(
78 "error: {} filename warning(s) treated as errors (--strict)",
79 filename_warnings.len()
80 );
81 return 1;
82 }
83 }
84 if !tag_reg.is_empty() {
85 eprintln!(
86 "tags: {} tags loaded across {} categories",
87 tag_reg.len(),
88 tag_reg.category_slugs().len()
89 );
90 }
91
92 let mut entity_tag_errors = false;
93 for entry in reg.entries() {
94 let tag_errors = tag_reg.validate_tags(&entry.tags, 2);
95 for err in &tag_errors {
96 eprintln!("{}:{err}", entry.path.display());
97 }
98 if !tag_errors.is_empty() {
99 entity_tag_errors = true;
100 }
101 }
102
103 let results: Vec<ValidateResult> = case_files
104 .par_iter()
105 .map(|case_path| validate_single_case(case_path, ®, &tag_reg, quiet))
106 .collect();
107
108 let mut exit_code = i32::from(entity_tag_errors);
109 let mut all_events: Vec<(String, String)> = Vec::new();
110 let mut all_rel_ids: Vec<(String, String, usize)> = Vec::new();
111
112 for result in &results {
113 if result.exit_code != 0 {
114 exit_code = result.exit_code;
115 }
116 all_events.extend(result.events.iter().cloned());
117 all_rel_ids.extend(result.rel_ids.iter().cloned());
118 }
119
120 if let Some(code) = check_duplicate_event_names(&all_events) {
121 exit_code = code;
122 }
123
124 if let Some(code) = check_duplicate_rel_ids(&all_rel_ids) {
125 exit_code = code;
126 }
127
128 check_qualifier_consistency(®, strict, &mut exit_code);
129
130 let ok_count = results.iter().filter(|r| r.exit_code == 0).count();
132 let err_count = results.iter().filter(|r| r.exit_code != 0).count();
133 let total_entities: usize = results.iter().map(|r| r.entity_count).sum();
134 let total_rels: usize = results.iter().map(|r| r.rel_count).sum();
135 eprintln!(
136 "validate: {} case(s) ok, {} failed ({} entities, {} relationships)",
137 ok_count, err_count, total_entities, total_rels
138 );
139
140 exit_code
141}
142
143struct ValidateResult {
144 exit_code: i32,
145 events: Vec<(String, String)>,
146 rel_ids: Vec<(String, String, usize)>,
147 entity_count: usize,
148 rel_count: usize,
149}
150
151fn validate_single_case(
152 path: &str,
153 reg: ®istry::EntityRegistry,
154 tag_reg: &tags::TagRegistry,
155 quiet: bool,
156) -> ValidateResult {
157 let content = match std::fs::read_to_string(path) {
158 Ok(c) => c,
159 Err(e) => {
160 eprintln!("{path}: error reading file: {e}");
161 return ValidateResult {
162 exit_code: 2,
163 events: Vec::new(),
164 rel_ids: Vec::new(),
165 entity_count: 0,
166 rel_count: 0,
167 };
168 }
169 };
170
171 match parse_full(&content, Some(reg)) {
172 Ok((case, entities, rels)) => {
173 eprintln!(
174 "{path}: ok -- {id}: {title} ({ent} entities, {rel} relationships, {src} sources)",
175 id = case.id.as_deref().unwrap_or("(no id)"),
176 title = case.title,
177 ent = entities.len(),
178 rel = rels.len(),
179 src = case.sources.len(),
180 );
181 if !quiet {
182 if !case.summary.is_empty() {
183 eprintln!(
184 " summary: {}...",
185 &case.summary[..case.summary.len().min(80)]
186 );
187 }
188 for e in &entities {
189 let id_display = e.id.as_deref().unwrap_or("(no id)");
190 eprintln!(
191 " line {}: {id_display} {} ({}, {} fields)",
192 e.line, e.name, e.label, e.fields.len()
193 );
194 }
195 }
196
197 let events: Vec<(String, String)> = entities
198 .iter()
199 .filter(|e| e.label == entity::Label::Event)
200 .map(|e| (e.name.clone(), path.to_string()))
201 .collect();
202
203 for r in &rels {
204 if !quiet {
205 let id_display = r.id.as_deref().unwrap_or("(no id)");
206 eprintln!(
207 " line {}: {id_display} {} -> {}: {}",
208 r.line, r.source_name, r.target_name, r.rel_type,
209 );
210 }
211 }
212
213 let mut exit_code = 0;
214 let tag_errors = tag_reg.validate_tags(&case.tags, 2);
215 for err in &tag_errors {
216 eprintln!("{path}:{err}");
217 }
218 if !tag_errors.is_empty() {
219 exit_code = 1;
220 }
221
222 let rel_ids: Vec<(String, String, usize)> = rels
223 .iter()
224 .filter_map(|r| {
225 r.id.as_ref()
226 .map(|id| (id.clone(), path.to_string(), r.line))
227 })
228 .collect();
229
230 ValidateResult {
231 exit_code,
232 events,
233 rel_ids,
234 entity_count: entities.len(),
235 rel_count: rels.len(),
236 }
237 }
238 Err(errors) => {
239 for err in &errors {
240 eprintln!("{path}:{err}");
241 }
242 ValidateResult {
243 exit_code: 1,
244 events: Vec::new(),
245 rel_ids: Vec::new(),
246 entity_count: 0,
247 rel_count: 0,
248 }
249 }
250 }
251}
252
253fn check_duplicate_event_names(all_events: &[(String, String)]) -> Option<i32> {
254 let mut seen: HashMap<&str, &str> = HashMap::new();
255 let mut has_duplicates = false;
256
257 for (name, path) in all_events {
258 if let Some(&first_path) = seen.get(name.as_str()) {
259 eprintln!(
260 "error: duplicate event name {name:?} in {path} (first defined in {first_path})"
261 );
262 has_duplicates = true;
263 } else {
264 seen.insert(name, path);
265 }
266 }
267
268 if has_duplicates { Some(1) } else { None }
269}
270
271fn check_duplicate_rel_ids(all_rel_ids: &[(String, String, usize)]) -> Option<i32> {
272 let mut seen: HashMap<&str, (&str, usize)> = HashMap::new();
273 let mut has_duplicates = false;
274
275 for (id, path, line) in all_rel_ids {
276 if let Some(&(first_path, first_line)) = seen.get(id.as_str()) {
277 eprintln!(
278 "error: duplicate relationship id {id:?} at {path}:{line} (first defined at {first_path}:{first_line})"
279 );
280 has_duplicates = true;
281 } else {
282 seen.insert(id, (path, *line));
283 }
284 }
285
286 if has_duplicates { Some(1) } else { None }
287}
288
289fn check_qualifier_consistency(reg: ®istry::EntityRegistry, strict: bool, exit_code: &mut i32) {
290 use entity::FieldValue;
291
292 let mut by_lower: HashMap<String, Vec<(String, String)>> = HashMap::new();
293
294 for entry in reg.entries() {
295 let qualifier = entry
296 .entity
297 .fields
298 .iter()
299 .find(|(k, _)| k == "qualifier")
300 .and_then(|(_, v)| match v {
301 FieldValue::Single(s) => Some(s.as_str()),
302 FieldValue::List(_) => None,
303 });
304
305 if let Some(q) = qualifier {
306 by_lower
307 .entry(q.to_lowercase())
308 .or_default()
309 .push((q.to_string(), entry.path.display().to_string()));
310 }
311 }
312
313 let mut inconsistencies = 0usize;
314 for occurrences in by_lower.values() {
315 let first = &occurrences[0].0;
316 let inconsistent: Vec<_> = occurrences.iter().filter(|(q, _)| q != first).collect();
317 if !inconsistent.is_empty() {
318 inconsistencies += 1;
319 eprintln!(
320 "warning: inconsistent qualifier casing for {:?}:",
321 occurrences[0].0
322 );
323 for (q, path) in occurrences {
324 eprintln!(" {path}: {q:?}");
325 }
326 }
327 }
328
329 if strict && inconsistencies > 0 {
330 eprintln!(
331 "error: {inconsistencies} qualifier consistency warning(s) treated as errors (--strict)"
332 );
333 *exit_code = 1;
334 }
335}
336
337fn load_redirect_slugs(content_root: &Path) -> HashSet<String> {
339 let path = content_root.join("redirects.yaml");
340 let Ok(content) = std::fs::read_to_string(&path) else {
341 return HashSet::new();
342 };
343
344 let mut slugs = HashSet::new();
345 for line in content.lines() {
346 if let Some(from) = line.strip_prefix(" - from: ") {
347 slugs.insert(from.trim().to_string());
348 }
349 }
350 slugs
351}
352
353fn check_redirect_collisions(redirect_slugs: &HashSet<String>, content_root: &Path) -> usize {
354 if redirect_slugs.is_empty() {
355 return 0;
356 }
357
358 let mut errors = 0;
359 let root_str = content_root.to_string_lossy();
360
361 for slug in redirect_slugs {
362 let file_path = content_root.join(format!("{slug}.md"));
363 if file_path.exists() {
364 eprintln!(
365 "{root_str}/{slug}.md: error: file at old redirected path (see redirects.yaml)"
366 );
367 errors += 1;
368 }
369 }
370
371 errors
372}
373
374pub struct VerifyConfig {
378 pub concurrency: usize,
380 pub timeout: u64,
382 pub cache_path: Option<String>,
384 pub warn_only: bool,
386}
387
388pub fn verify(path: Option<&str>, root: Option<&str>, config: &VerifyConfig) -> i32 {
392 let content_root = resolve_content_root(path, root);
393 let reg = match load_registry(&content_root) {
394 Ok(r) => r,
395 Err(code) => return code,
396 };
397
398 let case_files = match resolve_case_files(path, &content_root) {
399 Ok(f) => f,
400 Err(code) => return code,
401 };
402
403 if case_files.is_empty() {
404 eprintln!("no case files found");
405 return 1;
406 }
407
408 let mut exit_code = 0;
409 for case_path in &case_files {
410 let result = verify_single_case(case_path, ®, config);
411 if result != 0 {
412 exit_code = result;
413 }
414 }
415
416 let reg_result = verify_registry_thumbnails(®, config);
417 if reg_result != 0 {
418 exit_code = reg_result;
419 }
420
421 exit_code
422}
423
424fn verify_single_case(
425 path: &str,
426 reg: ®istry::EntityRegistry,
427 config: &VerifyConfig,
428) -> i32 {
429 let content = match std::fs::read_to_string(path) {
430 Ok(c) => c,
431 Err(e) => {
432 eprintln!("{path}: error reading file: {e}");
433 return 2;
434 }
435 };
436
437 let (case, entities, rels) = match parse_full(&content, Some(reg)) {
438 Ok(result) => result,
439 Err(errors) => {
440 for err in &errors {
441 eprintln!("{path}:{err}");
442 }
443 return 1;
444 }
445 };
446
447 let mut collect_errors = Vec::new();
448 let urls = verifier::collect_urls(&case.sources, &entities, &rels, &mut collect_errors);
449
450 if !collect_errors.is_empty() {
451 for err in &collect_errors {
452 eprintln!("{path}:{err}");
453 }
454 return 1;
455 }
456
457 if urls.is_empty() {
458 eprintln!("{path}: no URLs to verify");
459 return 0;
460 }
461
462 run_url_verification(path, &urls, config)
463}
464
465fn verify_registry_thumbnails(
466 reg: ®istry::EntityRegistry,
467 config: &VerifyConfig,
468) -> i32 {
469 let urls = verifier::collect_registry_urls(reg);
470 if urls.is_empty() {
471 return 0;
472 }
473 run_url_verification("(registry)", &urls, config)
474}
475
476fn run_url_verification(
477 label: &str,
478 urls: &[verifier::UrlEntry],
479 config: &VerifyConfig,
480) -> i32 {
481 let mut verify_cache = load_verify_cache(label, config.cache_path.as_deref());
482
483 let (cached_results, urls_to_check) = partition_cached(urls, verify_cache.as_ref());
484 let check_count = urls_to_check.len();
485 let cached_count = cached_results.len();
486
487 if cached_count > 0 {
488 eprintln!(
489 "{label}: {cached_count} cached, {check_count} to check \
490 (concurrency={}, timeout={}s)",
491 config.concurrency, config.timeout
492 );
493 } else {
494 eprintln!(
495 "{label}: verifying {check_count} URLs \
496 (concurrency={}, timeout={}s)",
497 config.concurrency, config.timeout
498 );
499 }
500
501 let fresh_results = if urls_to_check.is_empty() {
502 Vec::new()
503 } else {
504 let rt = match tokio::runtime::Builder::new_current_thread()
505 .enable_all()
506 .build()
507 {
508 Ok(rt) => rt,
509 Err(e) => {
510 eprintln!("{label}: failed to create async runtime: {e}");
511 return 2;
512 }
513 };
514 rt.block_on(verifier::verify_urls(
515 urls_to_check,
516 config.concurrency,
517 config.timeout,
518 ))
519 };
520
521 if let Some(ref mut vc) = verify_cache {
522 for check in &fresh_results {
523 vc.put(&check.url, check.status, check.detail.as_deref());
524 }
525 }
526
527 let mut all_results = cached_results;
528 all_results.extend(fresh_results);
529
530 let has_error = print_verification_results(label, &all_results);
531
532 if let Some(ref vc) = verify_cache
533 && let Err(e) = vc.save()
534 {
535 eprintln!("{label}: cache save warning: {e}");
536 }
537
538 i32::from(has_error && !config.warn_only)
539}
540
541fn load_verify_cache(
542 label: &str,
543 cache_path: Option<&str>,
544) -> Option<crate::cache::VerifyCache> {
545 cache_path.map(|p| match crate::cache::VerifyCache::load(p) {
546 Ok(c) => {
547 eprintln!("{label}: using cache {p}");
548 c
549 }
550 Err(e) => {
551 eprintln!("{label}: cache load warning: {e}");
552 crate::cache::VerifyCache::load("/dev/null")
553 .unwrap_or_else(|_| crate::cache::VerifyCache::empty())
554 }
555 })
556}
557
558fn print_verification_results(label: &str, results: &[verifier::UrlCheck]) -> bool {
559 let mut has_error = false;
560
561 for check in results {
562 let detail = check.detail.as_deref().unwrap_or("");
563 match check.status {
564 verifier::CheckStatus::Ok => {
565 eprintln!(
566 " ok {}{}",
567 check.url,
568 if check.is_thumbnail {
569 " [thumbnail]"
570 } else {
571 ""
572 }
573 );
574 }
575 verifier::CheckStatus::Warn => {
576 eprintln!(" warn {} -- {detail}", check.url);
577 }
578 verifier::CheckStatus::Error => {
579 has_error = true;
580 eprintln!(" ERROR {} -- {detail}", check.url);
581 }
582 }
583 }
584
585 let ok_count = results
586 .iter()
587 .filter(|c| c.status == verifier::CheckStatus::Ok)
588 .count();
589 let warn_count = results
590 .iter()
591 .filter(|c| c.status == verifier::CheckStatus::Warn)
592 .count();
593 let err_count = results
594 .iter()
595 .filter(|c| c.status == verifier::CheckStatus::Error)
596 .count();
597
598 eprintln!("{label}: {ok_count} ok, {warn_count} warn, {err_count} error");
599
600 has_error
601}
602
603fn partition_cached(
604 urls: &[verifier::UrlEntry],
605 verify_cache: Option<&crate::cache::VerifyCache>,
606) -> (Vec<verifier::UrlCheck>, Vec<verifier::UrlEntry>) {
607 let Some(vc) = verify_cache else {
608 return (Vec::new(), urls.to_vec());
609 };
610
611 let mut cached = Vec::new();
612 let mut uncached = Vec::new();
613
614 for entry in urls {
615 if let Some(cache_entry) = vc.get(entry.url()) {
616 let status = match cache_entry.status.as_str() {
617 "ok" => verifier::CheckStatus::Ok,
618 "warn" => verifier::CheckStatus::Warn,
619 _ => verifier::CheckStatus::Error,
620 };
621 cached.push(verifier::UrlCheck {
622 url: entry.url().to_string(),
623 status,
624 detail: cache_entry.detail.clone(),
625 is_thumbnail: entry.is_thumbnail(),
626 });
627 } else {
628 uncached.push(entry.clone());
629 }
630 }
631
632 (cached, uncached)
633}
634
635pub struct StalenessConfig {
639 pub investigation_months: u32,
641 pub trial_months: u32,
643 pub appeal_months: u32,
645}
646
647impl Default for StalenessConfig {
648 fn default() -> Self {
649 Self {
650 investigation_months: 6,
651 trial_months: 12,
652 appeal_months: 12,
653 }
654 }
655}
656
657pub fn check_staleness(path: Option<&str>, root: Option<&str>, config: &StalenessConfig) -> i32 {
661 let content_root = resolve_content_root(path, root);
662 let reg = match load_registry(&content_root) {
663 Ok(r) => r,
664 Err(code) => return code,
665 };
666
667 let case_files = match resolve_case_files(path, &content_root) {
668 Ok(f) => f,
669 Err(code) => return code,
670 };
671
672 if case_files.is_empty() {
673 eprintln!("no case files found");
674 return 1;
675 }
676
677 let thresholds = staleness::Thresholds {
678 investigation_months: config.investigation_months,
679 trial_months: config.trial_months,
680 appeal_months: config.appeal_months,
681 };
682
683 let now = chrono_today();
684
685 let mut all_findings: Vec<(String, staleness::Finding)> = Vec::new();
686
687 for case_path in &case_files {
688 let content = match std::fs::read_to_string(case_path) {
689 Ok(c) => c,
690 Err(e) => {
691 eprintln!("{case_path}: error reading file: {e}");
692 continue;
693 }
694 };
695
696 let (case, entities, _rels) = match parse_full(&content, Some(®)) {
697 Ok(result) => result,
698 Err(errors) => {
699 for err in &errors {
700 eprintln!("{case_path}:{err}");
701 }
702 continue;
703 }
704 };
705
706 let findings = staleness::check_case(&case, &entities, &thresholds, now);
707 for finding in findings {
708 all_findings.push((case_path.clone(), finding));
709 }
710 }
711
712 all_findings.sort_by_key(|a| a.1.severity);
713
714 let mut errors = 0u32;
715 let mut warnings = 0u32;
716 let mut infos = 0u32;
717
718 for (path, finding) in &all_findings {
719 eprintln!("{}: {path}: {}", finding.severity, finding.message);
720 match finding.severity {
721 staleness::Severity::Error => errors += 1,
722 staleness::Severity::Warning => warnings += 1,
723 staleness::Severity::Info => infos += 1,
724 }
725 }
726
727 eprintln!(
728 "staleness: {errors} error(s), {warnings} warning(s), {infos} info(s) across {} case(s)",
729 case_files.len()
730 );
731
732 i32::from(errors > 0)
733}
734
735fn chrono_today() -> (i32, u32, u32) {
737 let now = std::time::SystemTime::now();
738 let since_epoch = now
739 .duration_since(std::time::UNIX_EPOCH)
740 .unwrap_or_default();
741 let days = since_epoch.as_secs() / 86400;
742 days_to_date(days)
743}
744
745#[allow(
748 clippy::cast_possible_truncation,
749 clippy::cast_sign_loss,
750 clippy::cast_possible_wrap
751)]
752fn days_to_date(days: u64) -> (i32, u32, u32) {
753 let z = i64::from(u32::try_from(days).unwrap_or(u32::MAX)) + 719_468;
754 let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
755 let doe = (z - era * 146_097) as u32;
756 let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
757 let y = (i64::from(yoe) + era * 400) as i32;
758 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
759 let mp = (5 * doy + 2) / 153;
760 let d = doy - (153 * mp + 2) / 5 + 1;
761 let m = if mp < 10 { mp + 3 } else { mp - 9 };
762 let y = if m <= 2 { y + 1 } else { y };
763 (y, m, d)
764}
765
766#[cfg(test)]
767mod tests {
768 use super::*;
769
770 #[test]
771 fn days_to_date_epoch() {
772 assert_eq!(days_to_date(0), (1970, 1, 1));
773 }
774
775 #[test]
776 fn days_to_date_known() {
777 assert_eq!(days_to_date(20089), (2025, 1, 1));
779 }
780
781 #[test]
782 fn redirect_slugs_empty_on_missing_file() {
783 let slugs = load_redirect_slugs(Path::new("/nonexistent"));
784 assert!(slugs.is_empty());
785 }
786
787 #[test]
788 fn staleness_config_defaults() {
789 let config = StalenessConfig::default();
790 assert_eq!(config.investigation_months, 6);
791 assert_eq!(config.trial_months, 12);
792 assert_eq!(config.appeal_months, 12);
793 }
794}