Skip to main content

weave_content/
commands.rs

1//! High-level orchestration commands for content operations.
2//!
3//! These functions encapsulate the full pipeline for validate, verify,
4//! and check-staleness operations. Both `weave-content` CLI and `loom-seed`
5//! call into these functions.
6
7use std::collections::{HashMap, HashSet};
8use std::path::Path;
9
10use rayon::prelude::*;
11
12use crate::entity;
13use crate::registry;
14use crate::staleness;
15use crate::tags;
16use crate::verifier;
17use crate::{load_registry, load_tag_registry, parse_full, resolve_case_files, resolve_content_root};
18
19// ── Validate ────────────────────────────────────────────────────────────────
20
21/// Run full validation on content files: parse, check schema, cross-file
22/// duplicate detection, redirect collision checks, qualifier consistency.
23///
24/// When `strict` is true, warnings (e.g. filename mismatches) are treated
25/// as errors and cause a non-zero exit code.
26///
27/// Returns exit code (0 = success).
28pub fn validate(path: Option<&str>, root: Option<&str>, strict: bool) -> i32 {
29    let content_root = resolve_content_root(path, root);
30
31    let redirect_slugs = load_redirect_slugs(&content_root);
32    if !redirect_slugs.is_empty() {
33        eprintln!("redirects: {} entries loaded", redirect_slugs.len());
34        let collision_count = check_redirect_collisions(&redirect_slugs, &content_root);
35        if collision_count > 0 {
36            eprintln!(
37                "error: {collision_count} file(s) at old redirected paths — move them or remove the redirect entry"
38            );
39            return 1;
40        }
41    }
42
43    let reg = match load_registry(&content_root) {
44        Ok(r) => r,
45        Err(code) => return code,
46    };
47
48    let tag_reg = match load_tag_registry(&content_root) {
49        Ok(r) => r,
50        Err(code) => return code,
51    };
52
53    let case_files = match resolve_case_files(path, &content_root) {
54        Ok(f) => f,
55        Err(code) => return code,
56    };
57
58    if case_files.is_empty() {
59        eprintln!("no case files found");
60        return 1;
61    }
62
63    if !reg.is_empty() {
64        eprintln!("registry: {} entities loaded", reg.len());
65    }
66
67    // In strict mode, filename mismatch warnings become errors
68    if strict {
69        let filename_warnings = reg.check_filenames();
70        if !filename_warnings.is_empty() {
71            for w in &filename_warnings {
72                eprintln!("{w}");
73            }
74            eprintln!(
75                "error: {} filename warning(s) treated as errors (--strict)",
76                filename_warnings.len()
77            );
78            return 1;
79        }
80    }
81    if !tag_reg.is_empty() {
82        eprintln!(
83            "tags: {} tags loaded across {} categories",
84            tag_reg.len(),
85            tag_reg.category_slugs().len()
86        );
87    }
88
89    let mut entity_tag_errors = false;
90    for entry in reg.entries() {
91        let tag_errors = tag_reg.validate_tags(&entry.tags, 2);
92        for err in &tag_errors {
93            eprintln!("{}:{err}", entry.path.display());
94        }
95        if !tag_errors.is_empty() {
96            entity_tag_errors = true;
97        }
98    }
99
100    let results: Vec<ValidateResult> = case_files
101        .par_iter()
102        .map(|case_path| validate_single_case(case_path, &reg, &tag_reg))
103        .collect();
104
105    let mut exit_code = i32::from(entity_tag_errors);
106    let mut all_events: Vec<(String, String)> = Vec::new();
107    let mut all_rel_ids: Vec<(String, String, usize)> = Vec::new();
108
109    for result in results {
110        if result.exit_code != 0 {
111            exit_code = result.exit_code;
112        }
113        all_events.extend(result.events);
114        all_rel_ids.extend(result.rel_ids);
115    }
116
117    if let Some(code) = check_duplicate_event_names(&all_events) {
118        exit_code = code;
119    }
120
121    if let Some(code) = check_duplicate_rel_ids(&all_rel_ids) {
122        exit_code = code;
123    }
124
125    check_qualifier_consistency(&reg, strict, &mut exit_code);
126
127    exit_code
128}
129
130struct ValidateResult {
131    exit_code: i32,
132    events: Vec<(String, String)>,
133    rel_ids: Vec<(String, String, usize)>,
134}
135
136fn validate_single_case(
137    path: &str,
138    reg: &registry::EntityRegistry,
139    tag_reg: &tags::TagRegistry,
140) -> ValidateResult {
141    let content = match std::fs::read_to_string(path) {
142        Ok(c) => c,
143        Err(e) => {
144            eprintln!("{path}: error reading file: {e}");
145            return ValidateResult {
146                exit_code: 2,
147                events: Vec::new(),
148                rel_ids: Vec::new(),
149            };
150        }
151    };
152
153    match parse_full(&content, Some(reg)) {
154        Ok((case, entities, rels)) => {
155            eprintln!(
156                "{path}: ok -- {id}: {title} ({ent} entities, {rel} relationships, {src} sources)",
157                id = case.id.as_deref().unwrap_or("(no id)"),
158                title = case.title,
159                ent = entities.len(),
160                rel = rels.len(),
161                src = case.sources.len(),
162            );
163            if !case.summary.is_empty() {
164                eprintln!(
165                    "  summary: {}...",
166                    &case.summary[..case.summary.len().min(80)]
167                );
168            }
169            for e in &entities {
170                let id_display = e.id.as_deref().unwrap_or("(no id)");
171                eprintln!(
172                    "  line {}: {id_display} {} ({}, {} fields)",
173                    e.line, e.name, e.label, e.fields.len()
174                );
175            }
176
177            let events: Vec<(String, String)> = entities
178                .iter()
179                .filter(|e| e.label == entity::Label::Event)
180                .map(|e| (e.name.clone(), path.to_string()))
181                .collect();
182
183            for r in &rels {
184                let id_display = r.id.as_deref().unwrap_or("(no id)");
185                eprintln!(
186                    "  line {}: {id_display} {} -> {}: {}",
187                    r.line, r.source_name, r.target_name, r.rel_type,
188                );
189            }
190
191            let mut exit_code = 0;
192            let tag_errors = tag_reg.validate_tags(&case.tags, 2);
193            for err in &tag_errors {
194                eprintln!("{path}:{err}");
195            }
196            if !tag_errors.is_empty() {
197                exit_code = 1;
198            }
199
200            let rel_ids: Vec<(String, String, usize)> = rels
201                .iter()
202                .filter_map(|r| {
203                    r.id.as_ref()
204                        .map(|id| (id.clone(), path.to_string(), r.line))
205                })
206                .collect();
207
208            ValidateResult {
209                exit_code,
210                events,
211                rel_ids,
212            }
213        }
214        Err(errors) => {
215            for err in &errors {
216                eprintln!("{path}:{err}");
217            }
218            ValidateResult {
219                exit_code: 1,
220                events: Vec::new(),
221                rel_ids: Vec::new(),
222            }
223        }
224    }
225}
226
227fn check_duplicate_event_names(all_events: &[(String, String)]) -> Option<i32> {
228    let mut seen: HashMap<&str, &str> = HashMap::new();
229    let mut has_duplicates = false;
230
231    for (name, path) in all_events {
232        if let Some(&first_path) = seen.get(name.as_str()) {
233            eprintln!(
234                "error: duplicate event name {name:?} in {path} (first defined in {first_path})"
235            );
236            has_duplicates = true;
237        } else {
238            seen.insert(name, path);
239        }
240    }
241
242    if has_duplicates { Some(1) } else { None }
243}
244
245fn check_duplicate_rel_ids(all_rel_ids: &[(String, String, usize)]) -> Option<i32> {
246    let mut seen: HashMap<&str, (&str, usize)> = HashMap::new();
247    let mut has_duplicates = false;
248
249    for (id, path, line) in all_rel_ids {
250        if let Some(&(first_path, first_line)) = seen.get(id.as_str()) {
251            eprintln!(
252                "error: duplicate relationship id {id:?} at {path}:{line} (first defined at {first_path}:{first_line})"
253            );
254            has_duplicates = true;
255        } else {
256            seen.insert(id, (path, *line));
257        }
258    }
259
260    if has_duplicates { Some(1) } else { None }
261}
262
263fn check_qualifier_consistency(reg: &registry::EntityRegistry, strict: bool, exit_code: &mut i32) {
264    use entity::FieldValue;
265
266    let mut by_lower: HashMap<String, Vec<(String, String)>> = HashMap::new();
267
268    for entry in reg.entries() {
269        let qualifier = entry
270            .entity
271            .fields
272            .iter()
273            .find(|(k, _)| k == "qualifier")
274            .and_then(|(_, v)| match v {
275                FieldValue::Single(s) => Some(s.as_str()),
276                FieldValue::List(_) => None,
277            });
278
279        if let Some(q) = qualifier {
280            by_lower
281                .entry(q.to_lowercase())
282                .or_default()
283                .push((q.to_string(), entry.path.display().to_string()));
284        }
285    }
286
287    let mut inconsistencies = 0usize;
288    for occurrences in by_lower.values() {
289        let first = &occurrences[0].0;
290        let inconsistent: Vec<_> = occurrences.iter().filter(|(q, _)| q != first).collect();
291        if !inconsistent.is_empty() {
292            inconsistencies += 1;
293            eprintln!(
294                "warning: inconsistent qualifier casing for {:?}:",
295                occurrences[0].0
296            );
297            for (q, path) in occurrences {
298                eprintln!("  {path}: {q:?}");
299            }
300        }
301    }
302
303    if strict && inconsistencies > 0 {
304        eprintln!(
305            "error: {inconsistencies} qualifier consistency warning(s) treated as errors (--strict)"
306        );
307        *exit_code = 1;
308    }
309}
310
311/// Load redirect `from:` slugs from `redirects.yaml` in the content root.
312fn load_redirect_slugs(content_root: &Path) -> HashSet<String> {
313    let path = content_root.join("redirects.yaml");
314    let Ok(content) = std::fs::read_to_string(&path) else {
315        return HashSet::new();
316    };
317
318    let mut slugs = HashSet::new();
319    for line in content.lines() {
320        if let Some(from) = line.strip_prefix("  - from: ") {
321            slugs.insert(from.trim().to_string());
322        }
323    }
324    slugs
325}
326
327fn check_redirect_collisions(redirect_slugs: &HashSet<String>, content_root: &Path) -> usize {
328    if redirect_slugs.is_empty() {
329        return 0;
330    }
331
332    let mut errors = 0;
333    let root_str = content_root.to_string_lossy();
334
335    for slug in redirect_slugs {
336        let file_path = content_root.join(format!("{slug}.md"));
337        if file_path.exists() {
338            eprintln!(
339                "{root_str}/{slug}.md: error: file at old redirected path (see redirects.yaml)"
340            );
341            errors += 1;
342        }
343    }
344
345    errors
346}
347
348// ── Verify ──────────────────────────────────────────────────────────────────
349
350/// Configuration for URL verification.
351pub struct VerifyConfig {
352    /// Maximum concurrent requests.
353    pub concurrency: usize,
354    /// Per-URL timeout in seconds.
355    pub timeout: u64,
356    /// Path to URL verification cache file.
357    pub cache_path: Option<String>,
358    /// Report all as warnings, never fail.
359    pub warn_only: bool,
360}
361
362/// Run URL verification on content files.
363///
364/// Returns exit code (0 = success).
365pub fn verify(path: Option<&str>, root: Option<&str>, config: &VerifyConfig) -> i32 {
366    let content_root = resolve_content_root(path, root);
367    let reg = match load_registry(&content_root) {
368        Ok(r) => r,
369        Err(code) => return code,
370    };
371
372    let case_files = match resolve_case_files(path, &content_root) {
373        Ok(f) => f,
374        Err(code) => return code,
375    };
376
377    if case_files.is_empty() {
378        eprintln!("no case files found");
379        return 1;
380    }
381
382    let mut exit_code = 0;
383    for case_path in &case_files {
384        let result = verify_single_case(case_path, &reg, config);
385        if result != 0 {
386            exit_code = result;
387        }
388    }
389
390    let reg_result = verify_registry_thumbnails(&reg, config);
391    if reg_result != 0 {
392        exit_code = reg_result;
393    }
394
395    exit_code
396}
397
398fn verify_single_case(
399    path: &str,
400    reg: &registry::EntityRegistry,
401    config: &VerifyConfig,
402) -> i32 {
403    let content = match std::fs::read_to_string(path) {
404        Ok(c) => c,
405        Err(e) => {
406            eprintln!("{path}: error reading file: {e}");
407            return 2;
408        }
409    };
410
411    let (case, entities, rels) = match parse_full(&content, Some(reg)) {
412        Ok(result) => result,
413        Err(errors) => {
414            for err in &errors {
415                eprintln!("{path}:{err}");
416            }
417            return 1;
418        }
419    };
420
421    let mut collect_errors = Vec::new();
422    let urls = verifier::collect_urls(&case.sources, &entities, &rels, &mut collect_errors);
423
424    if !collect_errors.is_empty() {
425        for err in &collect_errors {
426            eprintln!("{path}:{err}");
427        }
428        return 1;
429    }
430
431    if urls.is_empty() {
432        eprintln!("{path}: no URLs to verify");
433        return 0;
434    }
435
436    run_url_verification(path, &urls, config)
437}
438
439fn verify_registry_thumbnails(
440    reg: &registry::EntityRegistry,
441    config: &VerifyConfig,
442) -> i32 {
443    let urls = verifier::collect_registry_urls(reg);
444    if urls.is_empty() {
445        return 0;
446    }
447    run_url_verification("(registry)", &urls, config)
448}
449
450fn run_url_verification(
451    label: &str,
452    urls: &[verifier::UrlEntry],
453    config: &VerifyConfig,
454) -> i32 {
455    let mut verify_cache = load_verify_cache(label, config.cache_path.as_deref());
456
457    let (cached_results, urls_to_check) = partition_cached(urls, verify_cache.as_ref());
458    let check_count = urls_to_check.len();
459    let cached_count = cached_results.len();
460
461    if cached_count > 0 {
462        eprintln!(
463            "{label}: {cached_count} cached, {check_count} to check \
464             (concurrency={}, timeout={}s)",
465            config.concurrency, config.timeout
466        );
467    } else {
468        eprintln!(
469            "{label}: verifying {check_count} URLs \
470             (concurrency={}, timeout={}s)",
471            config.concurrency, config.timeout
472        );
473    }
474
475    let fresh_results = if urls_to_check.is_empty() {
476        Vec::new()
477    } else {
478        let rt = match tokio::runtime::Builder::new_current_thread()
479            .enable_all()
480            .build()
481        {
482            Ok(rt) => rt,
483            Err(e) => {
484                eprintln!("{label}: failed to create async runtime: {e}");
485                return 2;
486            }
487        };
488        rt.block_on(verifier::verify_urls(
489            urls_to_check,
490            config.concurrency,
491            config.timeout,
492        ))
493    };
494
495    if let Some(ref mut vc) = verify_cache {
496        for check in &fresh_results {
497            vc.put(&check.url, check.status, check.detail.as_deref());
498        }
499    }
500
501    let mut all_results = cached_results;
502    all_results.extend(fresh_results);
503
504    let has_error = print_verification_results(label, &all_results);
505
506    if let Some(ref vc) = verify_cache
507        && let Err(e) = vc.save()
508    {
509        eprintln!("{label}: cache save warning: {e}");
510    }
511
512    i32::from(has_error && !config.warn_only)
513}
514
515fn load_verify_cache(
516    label: &str,
517    cache_path: Option<&str>,
518) -> Option<crate::cache::VerifyCache> {
519    cache_path.map(|p| match crate::cache::VerifyCache::load(p) {
520        Ok(c) => {
521            eprintln!("{label}: using cache {p}");
522            c
523        }
524        Err(e) => {
525            eprintln!("{label}: cache load warning: {e}");
526            crate::cache::VerifyCache::load("/dev/null")
527                .unwrap_or_else(|_| crate::cache::VerifyCache::empty())
528        }
529    })
530}
531
532fn print_verification_results(label: &str, results: &[verifier::UrlCheck]) -> bool {
533    let mut has_error = false;
534
535    for check in results {
536        let detail = check.detail.as_deref().unwrap_or("");
537        match check.status {
538            verifier::CheckStatus::Ok => {
539                eprintln!(
540                    "  ok  {}{}",
541                    check.url,
542                    if check.is_thumbnail {
543                        " [thumbnail]"
544                    } else {
545                        ""
546                    }
547                );
548            }
549            verifier::CheckStatus::Warn => {
550                eprintln!("  warn  {} -- {detail}", check.url);
551            }
552            verifier::CheckStatus::Error => {
553                has_error = true;
554                eprintln!("  ERROR {} -- {detail}", check.url);
555            }
556        }
557    }
558
559    let ok_count = results
560        .iter()
561        .filter(|c| c.status == verifier::CheckStatus::Ok)
562        .count();
563    let warn_count = results
564        .iter()
565        .filter(|c| c.status == verifier::CheckStatus::Warn)
566        .count();
567    let err_count = results
568        .iter()
569        .filter(|c| c.status == verifier::CheckStatus::Error)
570        .count();
571
572    eprintln!("{label}: {ok_count} ok, {warn_count} warn, {err_count} error");
573
574    has_error
575}
576
577fn partition_cached(
578    urls: &[verifier::UrlEntry],
579    verify_cache: Option<&crate::cache::VerifyCache>,
580) -> (Vec<verifier::UrlCheck>, Vec<verifier::UrlEntry>) {
581    let Some(vc) = verify_cache else {
582        return (Vec::new(), urls.to_vec());
583    };
584
585    let mut cached = Vec::new();
586    let mut uncached = Vec::new();
587
588    for entry in urls {
589        if let Some(cache_entry) = vc.get(entry.url()) {
590            let status = match cache_entry.status.as_str() {
591                "ok" => verifier::CheckStatus::Ok,
592                "warn" => verifier::CheckStatus::Warn,
593                _ => verifier::CheckStatus::Error,
594            };
595            cached.push(verifier::UrlCheck {
596                url: entry.url().to_string(),
597                status,
598                detail: cache_entry.detail.clone(),
599                is_thumbnail: entry.is_thumbnail(),
600            });
601        } else {
602            uncached.push(entry.clone());
603        }
604    }
605
606    (cached, uncached)
607}
608
609// ── Check Staleness ─────────────────────────────────────────────────────────
610
611/// Configuration for staleness checking.
612pub struct StalenessConfig {
613    /// Months before an `under_investigation` case is considered stale.
614    pub investigation_months: u32,
615    /// Months before a `trial` case is considered stale.
616    pub trial_months: u32,
617    /// Months before an `appeal` case is considered stale.
618    pub appeal_months: u32,
619}
620
621impl Default for StalenessConfig {
622    fn default() -> Self {
623        Self {
624            investigation_months: 6,
625            trial_months: 12,
626            appeal_months: 12,
627        }
628    }
629}
630
631/// Run staleness checks on content files.
632///
633/// Returns exit code (0 = success, 1 = errors found).
634pub fn check_staleness(path: Option<&str>, root: Option<&str>, config: &StalenessConfig) -> i32 {
635    let content_root = resolve_content_root(path, root);
636    let reg = match load_registry(&content_root) {
637        Ok(r) => r,
638        Err(code) => return code,
639    };
640
641    let case_files = match resolve_case_files(path, &content_root) {
642        Ok(f) => f,
643        Err(code) => return code,
644    };
645
646    if case_files.is_empty() {
647        eprintln!("no case files found");
648        return 1;
649    }
650
651    let thresholds = staleness::Thresholds {
652        investigation_months: config.investigation_months,
653        trial_months: config.trial_months,
654        appeal_months: config.appeal_months,
655    };
656
657    let now = chrono_today();
658
659    let mut all_findings: Vec<(String, staleness::Finding)> = Vec::new();
660
661    for case_path in &case_files {
662        let content = match std::fs::read_to_string(case_path) {
663            Ok(c) => c,
664            Err(e) => {
665                eprintln!("{case_path}: error reading file: {e}");
666                continue;
667            }
668        };
669
670        let (case, entities, _rels) = match parse_full(&content, Some(&reg)) {
671            Ok(result) => result,
672            Err(errors) => {
673                for err in &errors {
674                    eprintln!("{case_path}:{err}");
675                }
676                continue;
677            }
678        };
679
680        let findings = staleness::check_case(&case, &entities, &thresholds, now);
681        for finding in findings {
682            all_findings.push((case_path.clone(), finding));
683        }
684    }
685
686    all_findings.sort_by_key(|a| a.1.severity);
687
688    let mut errors = 0u32;
689    let mut warnings = 0u32;
690    let mut infos = 0u32;
691
692    for (path, finding) in &all_findings {
693        eprintln!("{}: {path}: {}", finding.severity, finding.message);
694        match finding.severity {
695            staleness::Severity::Error => errors += 1,
696            staleness::Severity::Warning => warnings += 1,
697            staleness::Severity::Info => infos += 1,
698        }
699    }
700
701    eprintln!(
702        "staleness: {errors} error(s), {warnings} warning(s), {infos} info(s) across {} case(s)",
703        case_files.len()
704    );
705
706    i32::from(errors > 0)
707}
708
709/// Get today's date as (year, month, day).
710fn chrono_today() -> (i32, u32, u32) {
711    let now = std::time::SystemTime::now();
712    let since_epoch = now
713        .duration_since(std::time::UNIX_EPOCH)
714        .unwrap_or_default();
715    let days = since_epoch.as_secs() / 86400;
716    days_to_date(days)
717}
718
719/// Convert days since Unix epoch to (year, month, day).
720/// Uses the algorithm from Howard Hinnant's date library.
721#[allow(
722    clippy::cast_possible_truncation,
723    clippy::cast_sign_loss,
724    clippy::cast_possible_wrap
725)]
726fn days_to_date(days: u64) -> (i32, u32, u32) {
727    let z = i64::from(u32::try_from(days).unwrap_or(u32::MAX)) + 719_468;
728    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
729    let doe = (z - era * 146_097) as u32;
730    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
731    let y = (i64::from(yoe) + era * 400) as i32;
732    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
733    let mp = (5 * doy + 2) / 153;
734    let d = doy - (153 * mp + 2) / 5 + 1;
735    let m = if mp < 10 { mp + 3 } else { mp - 9 };
736    let y = if m <= 2 { y + 1 } else { y };
737    (y, m, d)
738}
739
740#[cfg(test)]
741mod tests {
742    use super::*;
743
744    #[test]
745    fn days_to_date_epoch() {
746        assert_eq!(days_to_date(0), (1970, 1, 1));
747    }
748
749    #[test]
750    fn days_to_date_known() {
751        // 2025-01-01 = day 20089
752        assert_eq!(days_to_date(20089), (2025, 1, 1));
753    }
754
755    #[test]
756    fn redirect_slugs_empty_on_missing_file() {
757        let slugs = load_redirect_slugs(Path::new("/nonexistent"));
758        assert!(slugs.is_empty());
759    }
760
761    #[test]
762    fn staleness_config_defaults() {
763        let config = StalenessConfig::default();
764        assert_eq!(config.investigation_months, 6);
765        assert_eq!(config.trial_months, 12);
766        assert_eq!(config.appeal_months, 12);
767    }
768}