use std::collections::{HashMap, HashSet};
use std::path::Path;
use rayon::prelude::*;
use crate::entity;
use crate::registry;
use crate::staleness;
use crate::tags;
use crate::verifier;
use crate::{load_registry, load_tag_registry, parse_full, resolve_case_files, resolve_content_root};
pub fn validate(path: Option<&str>, root: Option<&str>, strict: bool, quiet: bool) -> i32 {
let content_root = resolve_content_root(path, root);
let redirect_slugs = load_redirect_slugs(&content_root);
if !redirect_slugs.is_empty() {
eprintln!("redirects: {} entries loaded", redirect_slugs.len());
let collision_count = check_redirect_collisions(&redirect_slugs, &content_root);
if collision_count > 0 {
eprintln!(
"error: {collision_count} file(s) at old redirected paths — move them or remove the redirect entry"
);
return 1;
}
}
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let tag_reg = match load_tag_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
if !reg.is_empty() {
eprintln!("registry: {} entities loaded", reg.len());
}
if strict {
let filename_warnings = reg.check_filenames();
if !filename_warnings.is_empty() {
for w in &filename_warnings {
eprintln!("{w}");
}
eprintln!(
"error: {} filename warning(s) treated as errors (--strict)",
filename_warnings.len()
);
return 1;
}
}
if !tag_reg.is_empty() {
eprintln!(
"tags: {} tags loaded across {} categories",
tag_reg.len(),
tag_reg.category_slugs().len()
);
}
let mut entity_tag_errors = false;
for entry in reg.entries() {
let tag_errors = tag_reg.validate_tags(&entry.tags, 2);
for err in &tag_errors {
eprintln!("{}:{err}", entry.path.display());
}
if !tag_errors.is_empty() {
entity_tag_errors = true;
}
}
let results: Vec<ValidateResult> = case_files
.par_iter()
.map(|case_path| validate_single_case(case_path, ®, &tag_reg, quiet))
.collect();
let mut exit_code = i32::from(entity_tag_errors);
let mut all_events: Vec<(String, String)> = Vec::new();
let mut all_rel_ids: Vec<(String, String, usize)> = Vec::new();
for result in &results {
if result.exit_code != 0 {
exit_code = result.exit_code;
}
all_events.extend(result.events.iter().cloned());
all_rel_ids.extend(result.rel_ids.iter().cloned());
}
if let Some(code) = check_duplicate_event_names(&all_events) {
exit_code = code;
}
if let Some(code) = check_duplicate_rel_ids(&all_rel_ids) {
exit_code = code;
}
check_qualifier_consistency(®, strict, &mut exit_code);
let ok_count = results.iter().filter(|r| r.exit_code == 0).count();
let err_count = results.iter().filter(|r| r.exit_code != 0).count();
let total_entities: usize = results.iter().map(|r| r.entity_count).sum();
let total_rels: usize = results.iter().map(|r| r.rel_count).sum();
eprintln!(
"validate: {} case(s) ok, {} failed ({} entities, {} relationships)",
ok_count, err_count, total_entities, total_rels
);
exit_code
}
struct ValidateResult {
exit_code: i32,
events: Vec<(String, String)>,
rel_ids: Vec<(String, String, usize)>,
entity_count: usize,
rel_count: usize,
}
fn validate_single_case(
path: &str,
reg: ®istry::EntityRegistry,
tag_reg: &tags::TagRegistry,
quiet: bool,
) -> ValidateResult {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
eprintln!("{path}: error reading file: {e}");
return ValidateResult {
exit_code: 2,
events: Vec::new(),
rel_ids: Vec::new(),
entity_count: 0,
rel_count: 0,
};
}
};
match parse_full(&content, Some(reg)) {
Ok((case, entities, rels)) => {
eprintln!(
"{path}: ok -- {id}: {title} ({ent} entities, {rel} relationships, {src} sources)",
id = case.id.as_deref().unwrap_or("(no id)"),
title = case.title,
ent = entities.len(),
rel = rels.len(),
src = case.sources.len(),
);
if !quiet {
if !case.summary.is_empty() {
eprintln!(
" summary: {}...",
&case.summary[..case.summary.len().min(80)]
);
}
for e in &entities {
let id_display = e.id.as_deref().unwrap_or("(no id)");
eprintln!(
" line {}: {id_display} {} ({}, {} fields)",
e.line, e.name, e.label, e.fields.len()
);
}
}
let events: Vec<(String, String)> = entities
.iter()
.filter(|e| e.label == entity::Label::Event)
.map(|e| (e.name.clone(), path.to_string()))
.collect();
for r in &rels {
if !quiet {
let id_display = r.id.as_deref().unwrap_or("(no id)");
eprintln!(
" line {}: {id_display} {} -> {}: {}",
r.line, r.source_name, r.target_name, r.rel_type,
);
}
}
let mut exit_code = 0;
let tag_errors = tag_reg.validate_tags(&case.tags, 2);
for err in &tag_errors {
eprintln!("{path}:{err}");
}
if !tag_errors.is_empty() {
exit_code = 1;
}
let rel_ids: Vec<(String, String, usize)> = rels
.iter()
.filter_map(|r| {
r.id.as_ref()
.map(|id| (id.clone(), path.to_string(), r.line))
})
.collect();
ValidateResult {
exit_code,
events,
rel_ids,
entity_count: entities.len(),
rel_count: rels.len(),
}
}
Err(errors) => {
for err in &errors {
eprintln!("{path}:{err}");
}
ValidateResult {
exit_code: 1,
events: Vec::new(),
rel_ids: Vec::new(),
entity_count: 0,
rel_count: 0,
}
}
}
}
fn check_duplicate_event_names(all_events: &[(String, String)]) -> Option<i32> {
let mut seen: HashMap<&str, &str> = HashMap::new();
let mut has_duplicates = false;
for (name, path) in all_events {
if let Some(&first_path) = seen.get(name.as_str()) {
eprintln!(
"error: duplicate event name {name:?} in {path} (first defined in {first_path})"
);
has_duplicates = true;
} else {
seen.insert(name, path);
}
}
if has_duplicates { Some(1) } else { None }
}
fn check_duplicate_rel_ids(all_rel_ids: &[(String, String, usize)]) -> Option<i32> {
let mut seen: HashMap<&str, (&str, usize)> = HashMap::new();
let mut has_duplicates = false;
for (id, path, line) in all_rel_ids {
if let Some(&(first_path, first_line)) = seen.get(id.as_str()) {
eprintln!(
"error: duplicate relationship id {id:?} at {path}:{line} (first defined at {first_path}:{first_line})"
);
has_duplicates = true;
} else {
seen.insert(id, (path, *line));
}
}
if has_duplicates { Some(1) } else { None }
}
fn check_qualifier_consistency(reg: ®istry::EntityRegistry, strict: bool, exit_code: &mut i32) {
use entity::FieldValue;
let mut by_lower: HashMap<String, Vec<(String, String)>> = HashMap::new();
for entry in reg.entries() {
let qualifier = entry
.entity
.fields
.iter()
.find(|(k, _)| k == "qualifier")
.and_then(|(_, v)| match v {
FieldValue::Single(s) => Some(s.as_str()),
FieldValue::List(_) => None,
});
if let Some(q) = qualifier {
by_lower
.entry(q.to_lowercase())
.or_default()
.push((q.to_string(), entry.path.display().to_string()));
}
}
let mut inconsistencies = 0usize;
for occurrences in by_lower.values() {
let first = &occurrences[0].0;
let inconsistent: Vec<_> = occurrences.iter().filter(|(q, _)| q != first).collect();
if !inconsistent.is_empty() {
inconsistencies += 1;
eprintln!(
"warning: inconsistent qualifier casing for {:?}:",
occurrences[0].0
);
for (q, path) in occurrences {
eprintln!(" {path}: {q:?}");
}
}
}
if strict && inconsistencies > 0 {
eprintln!(
"error: {inconsistencies} qualifier consistency warning(s) treated as errors (--strict)"
);
*exit_code = 1;
}
}
fn load_redirect_slugs(content_root: &Path) -> HashSet<String> {
let path = content_root.join("redirects.yaml");
let Ok(content) = std::fs::read_to_string(&path) else {
return HashSet::new();
};
let mut slugs = HashSet::new();
for line in content.lines() {
if let Some(from) = line.strip_prefix(" - from: ") {
slugs.insert(from.trim().to_string());
}
}
slugs
}
fn check_redirect_collisions(redirect_slugs: &HashSet<String>, content_root: &Path) -> usize {
if redirect_slugs.is_empty() {
return 0;
}
let mut errors = 0;
let root_str = content_root.to_string_lossy();
for slug in redirect_slugs {
let file_path = content_root.join(format!("{slug}.md"));
if file_path.exists() {
eprintln!(
"{root_str}/{slug}.md: error: file at old redirected path (see redirects.yaml)"
);
errors += 1;
}
}
errors
}
pub struct VerifyConfig {
pub concurrency: usize,
pub timeout: u64,
pub cache_path: Option<String>,
pub warn_only: bool,
}
pub fn verify(path: Option<&str>, root: Option<&str>, config: &VerifyConfig) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let mut exit_code = 0;
for case_path in &case_files {
let result = verify_single_case(case_path, ®, config);
if result != 0 {
exit_code = result;
}
}
let reg_result = verify_registry_thumbnails(®, config);
if reg_result != 0 {
exit_code = reg_result;
}
exit_code
}
fn verify_single_case(
path: &str,
reg: ®istry::EntityRegistry,
config: &VerifyConfig,
) -> i32 {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
eprintln!("{path}: error reading file: {e}");
return 2;
}
};
let (case, entities, rels) = match parse_full(&content, Some(reg)) {
Ok(result) => result,
Err(errors) => {
for err in &errors {
eprintln!("{path}:{err}");
}
return 1;
}
};
let mut collect_errors = Vec::new();
let urls = verifier::collect_urls(&case.sources, &entities, &rels, &mut collect_errors);
if !collect_errors.is_empty() {
for err in &collect_errors {
eprintln!("{path}:{err}");
}
return 1;
}
if urls.is_empty() {
eprintln!("{path}: no URLs to verify");
return 0;
}
run_url_verification(path, &urls, config)
}
fn verify_registry_thumbnails(
reg: ®istry::EntityRegistry,
config: &VerifyConfig,
) -> i32 {
let urls = verifier::collect_registry_urls(reg);
if urls.is_empty() {
return 0;
}
run_url_verification("(registry)", &urls, config)
}
fn run_url_verification(
label: &str,
urls: &[verifier::UrlEntry],
config: &VerifyConfig,
) -> i32 {
let mut verify_cache = load_verify_cache(label, config.cache_path.as_deref());
let (cached_results, urls_to_check) = partition_cached(urls, verify_cache.as_ref());
let check_count = urls_to_check.len();
let cached_count = cached_results.len();
if cached_count > 0 {
eprintln!(
"{label}: {cached_count} cached, {check_count} to check \
(concurrency={}, timeout={}s)",
config.concurrency, config.timeout
);
} else {
eprintln!(
"{label}: verifying {check_count} URLs \
(concurrency={}, timeout={}s)",
config.concurrency, config.timeout
);
}
let fresh_results = if urls_to_check.is_empty() {
Vec::new()
} else {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
eprintln!("{label}: failed to create async runtime: {e}");
return 2;
}
};
rt.block_on(verifier::verify_urls(
urls_to_check,
config.concurrency,
config.timeout,
))
};
if let Some(ref mut vc) = verify_cache {
for check in &fresh_results {
vc.put(&check.url, check.status, check.detail.as_deref());
}
}
let mut all_results = cached_results;
all_results.extend(fresh_results);
let has_error = print_verification_results(label, &all_results);
if let Some(ref vc) = verify_cache
&& let Err(e) = vc.save()
{
eprintln!("{label}: cache save warning: {e}");
}
i32::from(has_error && !config.warn_only)
}
fn load_verify_cache(
label: &str,
cache_path: Option<&str>,
) -> Option<crate::cache::VerifyCache> {
cache_path.map(|p| match crate::cache::VerifyCache::load(p) {
Ok(c) => {
eprintln!("{label}: using cache {p}");
c
}
Err(e) => {
eprintln!("{label}: cache load warning: {e}");
crate::cache::VerifyCache::load("/dev/null")
.unwrap_or_else(|_| crate::cache::VerifyCache::empty())
}
})
}
fn print_verification_results(label: &str, results: &[verifier::UrlCheck]) -> bool {
let mut has_error = false;
for check in results {
let detail = check.detail.as_deref().unwrap_or("");
match check.status {
verifier::CheckStatus::Ok => {
eprintln!(
" ok {}{}",
check.url,
if check.is_thumbnail {
" [thumbnail]"
} else {
""
}
);
}
verifier::CheckStatus::Warn => {
eprintln!(" warn {} -- {detail}", check.url);
}
verifier::CheckStatus::Error => {
has_error = true;
eprintln!(" ERROR {} -- {detail}", check.url);
}
}
}
let ok_count = results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Ok)
.count();
let warn_count = results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Warn)
.count();
let err_count = results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Error)
.count();
eprintln!("{label}: {ok_count} ok, {warn_count} warn, {err_count} error");
has_error
}
fn partition_cached(
urls: &[verifier::UrlEntry],
verify_cache: Option<&crate::cache::VerifyCache>,
) -> (Vec<verifier::UrlCheck>, Vec<verifier::UrlEntry>) {
let Some(vc) = verify_cache else {
return (Vec::new(), urls.to_vec());
};
let mut cached = Vec::new();
let mut uncached = Vec::new();
for entry in urls {
if let Some(cache_entry) = vc.get(entry.url()) {
let status = match cache_entry.status.as_str() {
"ok" => verifier::CheckStatus::Ok,
"warn" => verifier::CheckStatus::Warn,
_ => verifier::CheckStatus::Error,
};
cached.push(verifier::UrlCheck {
url: entry.url().to_string(),
status,
detail: cache_entry.detail.clone(),
is_thumbnail: entry.is_thumbnail(),
});
} else {
uncached.push(entry.clone());
}
}
(cached, uncached)
}
pub struct StalenessConfig {
pub investigation_months: u32,
pub trial_months: u32,
pub appeal_months: u32,
}
impl Default for StalenessConfig {
fn default() -> Self {
Self {
investigation_months: 6,
trial_months: 12,
appeal_months: 12,
}
}
}
pub fn check_staleness(path: Option<&str>, root: Option<&str>, config: &StalenessConfig) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let thresholds = staleness::Thresholds {
investigation_months: config.investigation_months,
trial_months: config.trial_months,
appeal_months: config.appeal_months,
};
let now = chrono_today();
let mut all_findings: Vec<(String, staleness::Finding)> = Vec::new();
for case_path in &case_files {
let content = match std::fs::read_to_string(case_path) {
Ok(c) => c,
Err(e) => {
eprintln!("{case_path}: error reading file: {e}");
continue;
}
};
let (case, entities, _rels) = match parse_full(&content, Some(®)) {
Ok(result) => result,
Err(errors) => {
for err in &errors {
eprintln!("{case_path}:{err}");
}
continue;
}
};
let findings = staleness::check_case(&case, &entities, &thresholds, now);
for finding in findings {
all_findings.push((case_path.clone(), finding));
}
}
all_findings.sort_by_key(|a| a.1.severity);
let mut errors = 0u32;
let mut warnings = 0u32;
let mut infos = 0u32;
for (path, finding) in &all_findings {
eprintln!("{}: {path}: {}", finding.severity, finding.message);
match finding.severity {
staleness::Severity::Error => errors += 1,
staleness::Severity::Warning => warnings += 1,
staleness::Severity::Info => infos += 1,
}
}
eprintln!(
"staleness: {errors} error(s), {warnings} warning(s), {infos} info(s) across {} case(s)",
case_files.len()
);
i32::from(errors > 0)
}
fn chrono_today() -> (i32, u32, u32) {
let now = std::time::SystemTime::now();
let since_epoch = now
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default();
let days = since_epoch.as_secs() / 86400;
days_to_date(days)
}
#[allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_possible_wrap
)]
fn days_to_date(days: u64) -> (i32, u32, u32) {
let z = i64::from(u32::try_from(days).unwrap_or(u32::MAX)) + 719_468;
let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
let doe = (z - era * 146_097) as u32;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let y = (i64::from(yoe) + era * 400) as i32;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let d = doy - (153 * mp + 2) / 5 + 1;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
let y = if m <= 2 { y + 1 } else { y };
(y, m, d)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn days_to_date_epoch() {
assert_eq!(days_to_date(0), (1970, 1, 1));
}
#[test]
fn days_to_date_known() {
assert_eq!(days_to_date(20089), (2025, 1, 1));
}
#[test]
fn redirect_slugs_empty_on_missing_file() {
let slugs = load_redirect_slugs(Path::new("/nonexistent"));
assert!(slugs.is_empty());
}
#[test]
fn staleness_config_defaults() {
let config = StalenessConfig::default();
assert_eq!(config.investigation_months, 6);
assert_eq!(config.trial_months, 12);
assert_eq!(config.appeal_months, 12);
}
}