Skip to main content

weave_content/
commands.rs

1//! High-level orchestration commands for content operations.
2//!
3//! These functions encapsulate the full pipeline for validate, verify,
4//! and check-staleness operations. Both `weave-content` CLI and `loom-seed`
5//! call into these functions.
6
7use std::collections::{HashMap, HashSet};
8use std::path::Path;
9
10use rayon::prelude::*;
11
12use crate::entity;
13use crate::registry;
14use crate::staleness;
15use crate::tags;
16use crate::verifier;
17use crate::{load_registry, load_tag_registry, parse_full, resolve_case_files, resolve_content_root};
18
19// ── Validate ────────────────────────────────────────────────────────────────
20
21/// Run full validation on content files: parse, check schema, cross-file
22/// duplicate detection, redirect collision checks, qualifier consistency.
23///
24/// When `strict` is true, warnings (e.g. filename mismatches) are treated
25/// as errors and cause a non-zero exit code.
26///
27/// When `quiet` is true, per-entity and per-relationship detail lines are
28/// suppressed. Only file-level summaries and errors/warnings are shown.
29///
30/// Returns exit code (0 = success).
31pub fn validate(path: Option<&str>, root: Option<&str>, strict: bool, quiet: bool) -> i32 {
32    let content_root = resolve_content_root(path, root);
33
34    let redirect_slugs = load_redirect_slugs(&content_root);
35    if !redirect_slugs.is_empty() {
36        eprintln!("redirects: {} entries loaded", redirect_slugs.len());
37        let collision_count = check_redirect_collisions(&redirect_slugs, &content_root);
38        if collision_count > 0 {
39            eprintln!(
40                "error: {collision_count} file(s) at old redirected paths — move them or remove the redirect entry"
41            );
42            return 1;
43        }
44    }
45
46    let reg = match load_registry(&content_root) {
47        Ok(r) => r,
48        Err(code) => return code,
49    };
50
51    let tag_reg = match load_tag_registry(&content_root) {
52        Ok(r) => r,
53        Err(code) => return code,
54    };
55
56    let case_files = match resolve_case_files(path, &content_root) {
57        Ok(f) => f,
58        Err(code) => return code,
59    };
60
61    if case_files.is_empty() {
62        eprintln!("no case files found");
63        return 1;
64    }
65
66    if !reg.is_empty() {
67        eprintln!("registry: {} entities loaded", reg.len());
68    }
69
70    // In strict mode, filename mismatch warnings become errors
71    if strict {
72        let filename_warnings = reg.check_filenames();
73        if !filename_warnings.is_empty() {
74            for w in &filename_warnings {
75                eprintln!("{w}");
76            }
77            eprintln!(
78                "error: {} filename warning(s) treated as errors (--strict)",
79                filename_warnings.len()
80            );
81            return 1;
82        }
83    }
84    if !tag_reg.is_empty() {
85        eprintln!(
86            "tags: {} tags loaded across {} categories",
87            tag_reg.len(),
88            tag_reg.category_slugs().len()
89        );
90    }
91
92    let mut entity_tag_errors = false;
93    for entry in reg.entries() {
94        let tag_errors = tag_reg.validate_tags(&entry.tags, 2);
95        for err in &tag_errors {
96            eprintln!("{}:{err}", entry.path.display());
97        }
98        if !tag_errors.is_empty() {
99            entity_tag_errors = true;
100        }
101    }
102
103    let results: Vec<ValidateResult> = case_files
104        .par_iter()
105        .map(|case_path| validate_single_case(case_path, &reg, &tag_reg, quiet))
106        .collect();
107
108    let mut exit_code = i32::from(entity_tag_errors);
109    let mut all_events: Vec<(String, String)> = Vec::new();
110    let mut all_rel_ids: Vec<(String, String, usize)> = Vec::new();
111
112    for result in &results {
113        if result.exit_code != 0 {
114            exit_code = result.exit_code;
115        }
116        all_events.extend(result.events.iter().cloned());
117        all_rel_ids.extend(result.rel_ids.iter().cloned());
118    }
119
120    if let Some(code) = check_duplicate_event_names(&all_events) {
121        exit_code = code;
122    }
123
124    if let Some(code) = check_duplicate_rel_ids(&all_rel_ids) {
125        exit_code = code;
126    }
127
128    check_qualifier_consistency(&reg, strict, &mut exit_code);
129
130    // Print summary
131    let ok_count = results.iter().filter(|r| r.exit_code == 0).count();
132    let err_count = results.iter().filter(|r| r.exit_code != 0).count();
133    let total_entities: usize = results.iter().map(|r| r.entity_count).sum();
134    let total_rels: usize = results.iter().map(|r| r.rel_count).sum();
135    eprintln!(
136        "validate: {} case(s) ok, {} failed ({} entities, {} relationships)",
137        ok_count, err_count, total_entities, total_rels
138    );
139
140    exit_code
141}
142
143struct ValidateResult {
144    exit_code: i32,
145    events: Vec<(String, String)>,
146    rel_ids: Vec<(String, String, usize)>,
147    entity_count: usize,
148    rel_count: usize,
149}
150
151fn validate_single_case(
152    path: &str,
153    reg: &registry::EntityRegistry,
154    tag_reg: &tags::TagRegistry,
155    quiet: bool,
156) -> ValidateResult {
157    let content = match std::fs::read_to_string(path) {
158        Ok(c) => c,
159        Err(e) => {
160            eprintln!("{path}: error reading file: {e}");
161            return ValidateResult {
162                exit_code: 2,
163                events: Vec::new(),
164                rel_ids: Vec::new(),
165                entity_count: 0,
166                rel_count: 0,
167            };
168        }
169    };
170
171    match parse_full(&content, Some(reg)) {
172        Ok((case, entities, rels)) => {
173            eprintln!(
174                "{path}: ok -- {id}: {title} ({ent} entities, {rel} relationships, {src} sources)",
175                id = case.id.as_deref().unwrap_or("(no id)"),
176                title = case.title,
177                ent = entities.len(),
178                rel = rels.len(),
179                src = case.sources.len(),
180            );
181            if !quiet {
182                if !case.summary.is_empty() {
183                    eprintln!(
184                        "  summary: {}...",
185                        &case.summary[..case.summary.len().min(80)]
186                    );
187                }
188                for e in &entities {
189                    let id_display = e.id.as_deref().unwrap_or("(no id)");
190                    eprintln!(
191                        "  line {}: {id_display} {} ({}, {} fields)",
192                        e.line, e.name, e.label, e.fields.len()
193                    );
194                }
195            }
196
197            let events: Vec<(String, String)> = entities
198                .iter()
199                .filter(|e| e.label == entity::Label::Event)
200                .map(|e| (e.name.clone(), path.to_string()))
201                .collect();
202
203            for r in &rels {
204                if !quiet {
205                    let id_display = r.id.as_deref().unwrap_or("(no id)");
206                    eprintln!(
207                        "  line {}: {id_display} {} -> {}: {}",
208                        r.line, r.source_name, r.target_name, r.rel_type,
209                    );
210                }
211            }
212
213            let mut exit_code = 0;
214            let tag_errors = tag_reg.validate_tags(&case.tags, 2);
215            for err in &tag_errors {
216                eprintln!("{path}:{err}");
217            }
218            if !tag_errors.is_empty() {
219                exit_code = 1;
220            }
221
222            let rel_ids: Vec<(String, String, usize)> = rels
223                .iter()
224                .filter_map(|r| {
225                    r.id.as_ref()
226                        .map(|id| (id.clone(), path.to_string(), r.line))
227                })
228                .collect();
229
230            ValidateResult {
231                exit_code,
232                events,
233                rel_ids,
234                entity_count: entities.len(),
235                rel_count: rels.len(),
236            }
237        }
238        Err(errors) => {
239            for err in &errors {
240                eprintln!("{path}:{err}");
241            }
242            ValidateResult {
243                exit_code: 1,
244                events: Vec::new(),
245                rel_ids: Vec::new(),
246                entity_count: 0,
247                rel_count: 0,
248            }
249        }
250    }
251}
252
253fn check_duplicate_event_names(all_events: &[(String, String)]) -> Option<i32> {
254    let mut seen: HashMap<&str, &str> = HashMap::new();
255    let mut has_duplicates = false;
256
257    for (name, path) in all_events {
258        if let Some(&first_path) = seen.get(name.as_str()) {
259            eprintln!(
260                "error: duplicate event name {name:?} in {path} (first defined in {first_path})"
261            );
262            has_duplicates = true;
263        } else {
264            seen.insert(name, path);
265        }
266    }
267
268    if has_duplicates { Some(1) } else { None }
269}
270
271fn check_duplicate_rel_ids(all_rel_ids: &[(String, String, usize)]) -> Option<i32> {
272    let mut seen: HashMap<&str, (&str, usize)> = HashMap::new();
273    let mut has_duplicates = false;
274
275    for (id, path, line) in all_rel_ids {
276        if let Some(&(first_path, first_line)) = seen.get(id.as_str()) {
277            eprintln!(
278                "error: duplicate relationship id {id:?} at {path}:{line} (first defined at {first_path}:{first_line})"
279            );
280            has_duplicates = true;
281        } else {
282            seen.insert(id, (path, *line));
283        }
284    }
285
286    if has_duplicates { Some(1) } else { None }
287}
288
289fn check_qualifier_consistency(reg: &registry::EntityRegistry, strict: bool, exit_code: &mut i32) {
290    use entity::FieldValue;
291
292    let mut by_lower: HashMap<String, Vec<(String, String)>> = HashMap::new();
293
294    for entry in reg.entries() {
295        let qualifier = entry
296            .entity
297            .fields
298            .iter()
299            .find(|(k, _)| k == "qualifier")
300            .and_then(|(_, v)| match v {
301                FieldValue::Single(s) => Some(s.as_str()),
302                FieldValue::List(_) => None,
303            });
304
305        if let Some(q) = qualifier {
306            by_lower
307                .entry(q.to_lowercase())
308                .or_default()
309                .push((q.to_string(), entry.path.display().to_string()));
310        }
311    }
312
313    let mut inconsistencies = 0usize;
314    for occurrences in by_lower.values() {
315        let first = &occurrences[0].0;
316        let inconsistent: Vec<_> = occurrences.iter().filter(|(q, _)| q != first).collect();
317        if !inconsistent.is_empty() {
318            inconsistencies += 1;
319            eprintln!(
320                "warning: inconsistent qualifier casing for {:?}:",
321                occurrences[0].0
322            );
323            for (q, path) in occurrences {
324                eprintln!("  {path}: {q:?}");
325            }
326        }
327    }
328
329    if strict && inconsistencies > 0 {
330        eprintln!(
331            "error: {inconsistencies} qualifier consistency warning(s) treated as errors (--strict)"
332        );
333        *exit_code = 1;
334    }
335}
336
337/// Load redirect `from:` slugs from `redirects.yaml` in the content root.
338fn load_redirect_slugs(content_root: &Path) -> HashSet<String> {
339    let path = content_root.join("redirects.yaml");
340    let Ok(content) = std::fs::read_to_string(&path) else {
341        return HashSet::new();
342    };
343
344    let mut slugs = HashSet::new();
345    for line in content.lines() {
346        if let Some(from) = line.strip_prefix("  - from: ") {
347            slugs.insert(from.trim().to_string());
348        }
349    }
350    slugs
351}
352
353fn check_redirect_collisions(redirect_slugs: &HashSet<String>, content_root: &Path) -> usize {
354    if redirect_slugs.is_empty() {
355        return 0;
356    }
357
358    let mut errors = 0;
359    let root_str = content_root.to_string_lossy();
360
361    for slug in redirect_slugs {
362        let file_path = content_root.join(format!("{slug}.md"));
363        if file_path.exists() {
364            eprintln!(
365                "{root_str}/{slug}.md: error: file at old redirected path (see redirects.yaml)"
366            );
367            errors += 1;
368        }
369    }
370
371    errors
372}
373
374// ── Verify ──────────────────────────────────────────────────────────────────
375
376/// Configuration for URL verification.
377pub struct VerifyConfig {
378    /// Maximum concurrent requests.
379    pub concurrency: usize,
380    /// Per-URL timeout in seconds.
381    pub timeout: u64,
382    /// Path to URL verification cache file.
383    pub cache_path: Option<String>,
384    /// Report all as warnings, never fail.
385    pub warn_only: bool,
386}
387
388/// Run URL verification on content files.
389///
390/// Returns exit code (0 = success).
391pub fn verify(path: Option<&str>, root: Option<&str>, config: &VerifyConfig) -> i32 {
392    let content_root = resolve_content_root(path, root);
393    let reg = match load_registry(&content_root) {
394        Ok(r) => r,
395        Err(code) => return code,
396    };
397
398    let case_files = match resolve_case_files(path, &content_root) {
399        Ok(f) => f,
400        Err(code) => return code,
401    };
402
403    if case_files.is_empty() {
404        eprintln!("no case files found");
405        return 1;
406    }
407
408    let mut exit_code = 0;
409    for case_path in &case_files {
410        let result = verify_single_case(case_path, &reg, config);
411        if result != 0 {
412            exit_code = result;
413        }
414    }
415
416    let reg_result = verify_registry_thumbnails(&reg, config);
417    if reg_result != 0 {
418        exit_code = reg_result;
419    }
420
421    exit_code
422}
423
424fn verify_single_case(
425    path: &str,
426    reg: &registry::EntityRegistry,
427    config: &VerifyConfig,
428) -> i32 {
429    let content = match std::fs::read_to_string(path) {
430        Ok(c) => c,
431        Err(e) => {
432            eprintln!("{path}: error reading file: {e}");
433            return 2;
434        }
435    };
436
437    let (case, entities, rels) = match parse_full(&content, Some(reg)) {
438        Ok(result) => result,
439        Err(errors) => {
440            for err in &errors {
441                eprintln!("{path}:{err}");
442            }
443            return 1;
444        }
445    };
446
447    let mut collect_errors = Vec::new();
448    let urls = verifier::collect_urls(&case.sources, &entities, &rels, &mut collect_errors);
449
450    if !collect_errors.is_empty() {
451        for err in &collect_errors {
452            eprintln!("{path}:{err}");
453        }
454        return 1;
455    }
456
457    if urls.is_empty() {
458        eprintln!("{path}: no URLs to verify");
459        return 0;
460    }
461
462    run_url_verification(path, &urls, config)
463}
464
465fn verify_registry_thumbnails(
466    reg: &registry::EntityRegistry,
467    config: &VerifyConfig,
468) -> i32 {
469    let urls = verifier::collect_registry_urls(reg);
470    if urls.is_empty() {
471        return 0;
472    }
473    run_url_verification("(registry)", &urls, config)
474}
475
476fn run_url_verification(
477    label: &str,
478    urls: &[verifier::UrlEntry],
479    config: &VerifyConfig,
480) -> i32 {
481    let mut verify_cache = load_verify_cache(label, config.cache_path.as_deref());
482
483    let (cached_results, urls_to_check) = partition_cached(urls, verify_cache.as_ref());
484    let check_count = urls_to_check.len();
485    let cached_count = cached_results.len();
486
487    if cached_count > 0 {
488        eprintln!(
489            "{label}: {cached_count} cached, {check_count} to check \
490             (concurrency={}, timeout={}s)",
491            config.concurrency, config.timeout
492        );
493    } else {
494        eprintln!(
495            "{label}: verifying {check_count} URLs \
496             (concurrency={}, timeout={}s)",
497            config.concurrency, config.timeout
498        );
499    }
500
501    let fresh_results = if urls_to_check.is_empty() {
502        Vec::new()
503    } else {
504        let rt = match tokio::runtime::Builder::new_current_thread()
505            .enable_all()
506            .build()
507        {
508            Ok(rt) => rt,
509            Err(e) => {
510                eprintln!("{label}: failed to create async runtime: {e}");
511                return 2;
512            }
513        };
514        rt.block_on(verifier::verify_urls(
515            urls_to_check,
516            config.concurrency,
517            config.timeout,
518        ))
519    };
520
521    if let Some(ref mut vc) = verify_cache {
522        for check in &fresh_results {
523            vc.put(&check.url, check.status, check.detail.as_deref());
524        }
525    }
526
527    let mut all_results = cached_results;
528    all_results.extend(fresh_results);
529
530    let has_error = print_verification_results(label, &all_results);
531
532    if let Some(ref vc) = verify_cache
533        && let Err(e) = vc.save()
534    {
535        eprintln!("{label}: cache save warning: {e}");
536    }
537
538    i32::from(has_error && !config.warn_only)
539}
540
541fn load_verify_cache(
542    label: &str,
543    cache_path: Option<&str>,
544) -> Option<crate::cache::VerifyCache> {
545    cache_path.map(|p| match crate::cache::VerifyCache::load(p) {
546        Ok(c) => {
547            eprintln!("{label}: using cache {p}");
548            c
549        }
550        Err(e) => {
551            eprintln!("{label}: cache load warning: {e}");
552            crate::cache::VerifyCache::load("/dev/null")
553                .unwrap_or_else(|_| crate::cache::VerifyCache::empty())
554        }
555    })
556}
557
558fn print_verification_results(label: &str, results: &[verifier::UrlCheck]) -> bool {
559    let mut has_error = false;
560
561    for check in results {
562        let detail = check.detail.as_deref().unwrap_or("");
563        match check.status {
564            verifier::CheckStatus::Ok => {
565                eprintln!(
566                    "  ok  {}{}",
567                    check.url,
568                    if check.is_thumbnail {
569                        " [thumbnail]"
570                    } else {
571                        ""
572                    }
573                );
574            }
575            verifier::CheckStatus::Warn => {
576                eprintln!("  warn  {} -- {detail}", check.url);
577            }
578            verifier::CheckStatus::Error => {
579                has_error = true;
580                eprintln!("  ERROR {} -- {detail}", check.url);
581            }
582        }
583    }
584
585    let ok_count = results
586        .iter()
587        .filter(|c| c.status == verifier::CheckStatus::Ok)
588        .count();
589    let warn_count = results
590        .iter()
591        .filter(|c| c.status == verifier::CheckStatus::Warn)
592        .count();
593    let err_count = results
594        .iter()
595        .filter(|c| c.status == verifier::CheckStatus::Error)
596        .count();
597
598    eprintln!("{label}: {ok_count} ok, {warn_count} warn, {err_count} error");
599
600    has_error
601}
602
603fn partition_cached(
604    urls: &[verifier::UrlEntry],
605    verify_cache: Option<&crate::cache::VerifyCache>,
606) -> (Vec<verifier::UrlCheck>, Vec<verifier::UrlEntry>) {
607    let Some(vc) = verify_cache else {
608        return (Vec::new(), urls.to_vec());
609    };
610
611    let mut cached = Vec::new();
612    let mut uncached = Vec::new();
613
614    for entry in urls {
615        if let Some(cache_entry) = vc.get(entry.url()) {
616            let status = match cache_entry.status.as_str() {
617                "ok" => verifier::CheckStatus::Ok,
618                "warn" => verifier::CheckStatus::Warn,
619                _ => verifier::CheckStatus::Error,
620            };
621            cached.push(verifier::UrlCheck {
622                url: entry.url().to_string(),
623                status,
624                detail: cache_entry.detail.clone(),
625                is_thumbnail: entry.is_thumbnail(),
626            });
627        } else {
628            uncached.push(entry.clone());
629        }
630    }
631
632    (cached, uncached)
633}
634
635// ── Check Staleness ─────────────────────────────────────────────────────────
636
637/// Configuration for staleness checking.
638pub struct StalenessConfig {
639    /// Months before an `under_investigation` case is considered stale.
640    pub investigation_months: u32,
641    /// Months before a `trial` case is considered stale.
642    pub trial_months: u32,
643    /// Months before an `appeal` case is considered stale.
644    pub appeal_months: u32,
645}
646
647impl Default for StalenessConfig {
648    fn default() -> Self {
649        Self {
650            investigation_months: 6,
651            trial_months: 12,
652            appeal_months: 12,
653        }
654    }
655}
656
657/// Run staleness checks on content files.
658///
659/// Returns exit code (0 = success, 1 = errors found).
660pub fn check_staleness(path: Option<&str>, root: Option<&str>, config: &StalenessConfig) -> i32 {
661    let content_root = resolve_content_root(path, root);
662    let reg = match load_registry(&content_root) {
663        Ok(r) => r,
664        Err(code) => return code,
665    };
666
667    let case_files = match resolve_case_files(path, &content_root) {
668        Ok(f) => f,
669        Err(code) => return code,
670    };
671
672    if case_files.is_empty() {
673        eprintln!("no case files found");
674        return 1;
675    }
676
677    let thresholds = staleness::Thresholds {
678        investigation_months: config.investigation_months,
679        trial_months: config.trial_months,
680        appeal_months: config.appeal_months,
681    };
682
683    let now = chrono_today();
684
685    let mut all_findings: Vec<(String, staleness::Finding)> = Vec::new();
686
687    for case_path in &case_files {
688        let content = match std::fs::read_to_string(case_path) {
689            Ok(c) => c,
690            Err(e) => {
691                eprintln!("{case_path}: error reading file: {e}");
692                continue;
693            }
694        };
695
696        let (case, entities, _rels) = match parse_full(&content, Some(&reg)) {
697            Ok(result) => result,
698            Err(errors) => {
699                for err in &errors {
700                    eprintln!("{case_path}:{err}");
701                }
702                continue;
703            }
704        };
705
706        let findings = staleness::check_case(&case, &entities, &thresholds, now);
707        for finding in findings {
708            all_findings.push((case_path.clone(), finding));
709        }
710    }
711
712    all_findings.sort_by_key(|a| a.1.severity);
713
714    let mut errors = 0u32;
715    let mut warnings = 0u32;
716    let mut infos = 0u32;
717
718    for (path, finding) in &all_findings {
719        eprintln!("{}: {path}: {}", finding.severity, finding.message);
720        match finding.severity {
721            staleness::Severity::Error => errors += 1,
722            staleness::Severity::Warning => warnings += 1,
723            staleness::Severity::Info => infos += 1,
724        }
725    }
726
727    eprintln!(
728        "staleness: {errors} error(s), {warnings} warning(s), {infos} info(s) across {} case(s)",
729        case_files.len()
730    );
731
732    i32::from(errors > 0)
733}
734
735/// Get today's date as (year, month, day).
736fn chrono_today() -> (i32, u32, u32) {
737    let now = std::time::SystemTime::now();
738    let since_epoch = now
739        .duration_since(std::time::UNIX_EPOCH)
740        .unwrap_or_default();
741    let days = since_epoch.as_secs() / 86400;
742    days_to_date(days)
743}
744
745/// Convert days since Unix epoch to (year, month, day).
746/// Uses the algorithm from Howard Hinnant's date library.
747#[allow(
748    clippy::cast_possible_truncation,
749    clippy::cast_sign_loss,
750    clippy::cast_possible_wrap
751)]
752fn days_to_date(days: u64) -> (i32, u32, u32) {
753    let z = i64::from(u32::try_from(days).unwrap_or(u32::MAX)) + 719_468;
754    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
755    let doe = (z - era * 146_097) as u32;
756    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
757    let y = (i64::from(yoe) + era * 400) as i32;
758    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
759    let mp = (5 * doy + 2) / 153;
760    let d = doy - (153 * mp + 2) / 5 + 1;
761    let m = if mp < 10 { mp + 3 } else { mp - 9 };
762    let y = if m <= 2 { y + 1 } else { y };
763    (y, m, d)
764}
765
766#[cfg(test)]
767mod tests {
768    use super::*;
769
770    #[test]
771    fn days_to_date_epoch() {
772        assert_eq!(days_to_date(0), (1970, 1, 1));
773    }
774
775    #[test]
776    fn days_to_date_known() {
777        // 2025-01-01 = day 20089
778        assert_eq!(days_to_date(20089), (2025, 1, 1));
779    }
780
781    #[test]
782    fn redirect_slugs_empty_on_missing_file() {
783        let slugs = load_redirect_slugs(Path::new("/nonexistent"));
784        assert!(slugs.is_empty());
785    }
786
787    #[test]
788    fn staleness_config_defaults() {
789        let config = StalenessConfig::default();
790        assert_eq!(config.investigation_months, 6);
791        assert_eq!(config.trial_months, 12);
792        assert_eq!(config.appeal_months, 12);
793    }
794}