use clap::{Parser, Subcommand};
use weave_content::cache;
use weave_content::output;
use weave_content::registry;
use weave_content::verifier;
use weave_content::{
build_case_output, load_registry, parse_full, resolve_case_files, resolve_content_root,
};
#[cfg(test)]
use weave_content::entity;
#[derive(Parser)]
#[command(name = "weave-content", version, about)]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Validate {
path: Option<String>,
#[arg(long)]
root: Option<String>,
},
Verify {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(long, default_value_t = 16)]
concurrency: usize,
#[arg(long, default_value_t = 15)]
timeout: u64,
#[arg(long)]
cache: Option<String>,
#[arg(long)]
warn_only: bool,
},
Build {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(short, long)]
output: Option<String>,
},
}
fn main() {
let cli = Cli::parse();
let exit_code = match cli.command {
Command::Validate { ref path, ref root } => cmd_validate(path.as_deref(), root.as_deref()),
Command::Verify {
ref path,
ref root,
concurrency,
timeout,
ref cache,
warn_only,
} => cmd_verify(
path.as_deref(),
root.as_deref(),
concurrency,
timeout,
cache.as_deref(),
warn_only,
),
Command::Build {
ref path,
ref root,
ref output,
} => cmd_build(path.as_deref(), root.as_deref(), output.as_deref()),
};
std::process::exit(exit_code);
}
fn cmd_validate(path: Option<&str>, root: Option<&str>) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
if !reg.is_empty() {
eprintln!("registry: {} entities loaded", reg.len());
}
let mut exit_code = 0;
let mut all_events: Vec<(String, String)> = Vec::new();
for case_path in &case_files {
let result = validate_single_case(case_path, ®, &mut all_events);
if result != 0 {
exit_code = result;
}
}
if let Some(code) = check_duplicate_event_names(&all_events) {
exit_code = code;
}
exit_code
}
fn validate_single_case(
path: &str,
reg: ®istry::EntityRegistry,
all_events: &mut Vec<(String, String)>,
) -> i32 {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
eprintln!("{path}: error reading file: {e}");
return 2;
}
};
match parse_full(&content, Some(reg)) {
Ok((case, entities, rels)) => {
eprintln!(
"{path}: ok -- {id}: {title} ({ent} entities, {rel} relationships, {src} sources)",
id = case.id,
title = case.title,
ent = entities.len(),
rel = rels.len(),
src = case.sources.len(),
);
if !case.summary.is_empty() {
eprintln!(
" summary: {}...",
&case.summary[..case.summary.len().min(80)]
);
}
for e in &entities {
let id_display = e.id.as_deref().unwrap_or("(no id)");
eprintln!(
" line {}: {id_display} {} ({}, {} fields)",
e.line,
e.name,
e.label,
e.fields.len()
);
}
for e in &entities {
if e.label == weave_content::entity::Label::PublicRecord {
all_events.push((e.name.clone(), path.to_string()));
}
}
for r in &rels {
let id_display = r.id.as_deref().unwrap_or("(no id)");
eprintln!(
" line {}: {id_display} {} -> {}: {}",
r.line, r.source_name, r.target_name, r.rel_type,
);
}
0
}
Err(errors) => {
for err in &errors {
eprintln!("{path}:{err}");
}
1
}
}
}
fn check_duplicate_event_names(all_events: &[(String, String)]) -> Option<i32> {
let mut seen: std::collections::HashMap<&str, &str> = std::collections::HashMap::new();
let mut has_duplicates = false;
for (name, path) in all_events {
if let Some(&first_path) = seen.get(name.as_str()) {
eprintln!(
"error: duplicate event name {name:?} in {path} (first defined in {first_path})"
);
has_duplicates = true;
} else {
seen.insert(name, path);
}
}
if has_duplicates { Some(1) } else { None }
}
#[allow(clippy::too_many_lines)]
fn cmd_verify(
path: Option<&str>,
root: Option<&str>,
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let mut exit_code = 0;
for case_path in &case_files {
let result =
verify_single_case(case_path, ®, concurrency, timeout, cache_path, warn_only);
if result != 0 {
exit_code = result;
}
}
let reg_result = verify_registry_thumbnails(®, concurrency, timeout, cache_path, warn_only);
if reg_result != 0 {
exit_code = reg_result;
}
exit_code
}
#[allow(clippy::too_many_lines)]
fn verify_single_case(
path: &str,
reg: ®istry::EntityRegistry,
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
eprintln!("{path}: error reading file: {e}");
return 2;
}
};
let (case, entities, rels) = match parse_full(&content, Some(reg)) {
Ok(result) => result,
Err(errors) => {
for err in &errors {
eprintln!("{path}:{err}");
}
return 1;
}
};
let mut collect_errors = Vec::new();
let urls = verifier::collect_urls(&case.sources, &entities, &rels, &mut collect_errors);
if !collect_errors.is_empty() {
for err in &collect_errors {
eprintln!("{path}:{err}");
}
return 1;
}
if urls.is_empty() {
eprintln!("{path}: no URLs to verify");
return 0;
}
let mut verify_cache = cache_path.map(|p| match cache::VerifyCache::load(p) {
Ok(c) => {
eprintln!("{path}: using cache {p}");
c
}
Err(e) => {
eprintln!("{path}: cache load warning: {e}");
cache::VerifyCache::load("/dev/null").unwrap_or_else(|_| {
cache::VerifyCache::empty()
})
}
});
let (cached_results, urls_to_check) = partition_cached(&urls, verify_cache.as_ref());
let check_count = urls_to_check.len();
let cached_count = cached_results.len();
if cached_count > 0 {
eprintln!(
"{path}: {cached_count} cached, {check_count} to check (concurrency={concurrency}, timeout={timeout}s)"
);
} else {
eprintln!(
"{path}: verifying {check_count} URLs (concurrency={concurrency}, timeout={timeout}s)"
);
}
let fresh_results = if urls_to_check.is_empty() {
Vec::new()
} else {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
eprintln!("{path}: failed to create async runtime: {e}");
return 2;
}
};
rt.block_on(verifier::verify_urls(urls_to_check, concurrency, timeout))
};
if let Some(ref mut vc) = verify_cache {
for check in &fresh_results {
vc.put(&check.url, check.status, check.detail.as_deref());
}
}
let mut all_results = cached_results;
all_results.extend(fresh_results);
let mut has_error = false;
for check in &all_results {
let detail = check.detail.as_deref().unwrap_or("");
match check.status {
verifier::CheckStatus::Ok => {
eprintln!(
" ok {}{}",
check.url,
if check.is_thumbnail {
" [thumbnail]"
} else {
""
}
);
}
verifier::CheckStatus::Warn => {
eprintln!(" warn {} -- {detail}", check.url);
}
verifier::CheckStatus::Error => {
has_error = true;
eprintln!(" ERROR {} -- {detail}", check.url);
}
}
}
let ok_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Ok)
.count();
let warn_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Warn)
.count();
let err_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Error)
.count();
eprintln!("{path}: {ok_count} ok, {warn_count} warn, {err_count} error");
if let Some(ref vc) = verify_cache
&& let Err(e) = vc.save()
{
eprintln!("{path}: cache save warning: {e}");
}
i32::from(has_error && !warn_only)
}
fn verify_registry_thumbnails(
reg: ®istry::EntityRegistry,
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let urls = verifier::collect_registry_urls(reg);
if urls.is_empty() {
return 0;
}
let label = "(registry)";
let mut verify_cache = cache_path.map(|p| match cache::VerifyCache::load(p) {
Ok(c) => c,
Err(e) => {
eprintln!("{label}: cache load warning: {e}");
cache::VerifyCache::load("/dev/null").unwrap_or_else(|_| cache::VerifyCache::empty())
}
});
let (cached_results, urls_to_check) = partition_cached(&urls, verify_cache.as_ref());
let check_count = urls_to_check.len();
let cached_count = cached_results.len();
if cached_count > 0 {
eprintln!(
"{label}: {cached_count} cached, {check_count} to check (concurrency={concurrency}, timeout={timeout}s)"
);
} else {
eprintln!(
"{label}: verifying {check_count} thumbnail URLs (concurrency={concurrency}, timeout={timeout}s)"
);
}
let fresh_results = if urls_to_check.is_empty() {
Vec::new()
} else {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
eprintln!("{label}: failed to create async runtime: {e}");
return 2;
}
};
rt.block_on(verifier::verify_urls(urls_to_check, concurrency, timeout))
};
if let Some(ref mut vc) = verify_cache {
for check in &fresh_results {
vc.put(&check.url, check.status, check.detail.as_deref());
}
}
let mut all_results = cached_results;
all_results.extend(fresh_results);
let mut has_error = false;
for check in &all_results {
let detail = check.detail.as_deref().unwrap_or("");
match check.status {
verifier::CheckStatus::Ok => {
eprintln!(" ok {} [thumbnail]", check.url);
}
verifier::CheckStatus::Warn => {
eprintln!(" warn {} -- {detail}", check.url);
}
verifier::CheckStatus::Error => {
has_error = true;
eprintln!(" ERROR {} -- {detail}", check.url);
}
}
}
let ok_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Ok)
.count();
let warn_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Warn)
.count();
let err_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Error)
.count();
eprintln!("{label}: {ok_count} ok, {warn_count} warn, {err_count} error");
if let Some(ref vc) = verify_cache
&& let Err(e) = vc.save()
{
eprintln!("{label}: cache save warning: {e}");
}
i32::from(has_error && !warn_only)
}
fn partition_cached(
urls: &[verifier::UrlEntry],
verify_cache: Option<&cache::VerifyCache>,
) -> (Vec<verifier::UrlCheck>, Vec<verifier::UrlEntry>) {
let Some(vc) = verify_cache else {
return (Vec::new(), urls.to_vec());
};
let mut cached = Vec::new();
let mut uncached = Vec::new();
for entry in urls {
if let Some(cache_entry) = vc.get(entry.url()) {
let status = match cache_entry.status.as_str() {
"ok" => verifier::CheckStatus::Ok,
"warn" => verifier::CheckStatus::Warn,
_ => verifier::CheckStatus::Error,
};
cached.push(verifier::UrlCheck {
url: entry.url().to_string(),
status,
detail: cache_entry.detail.clone(),
is_thumbnail: entry.is_thumbnail(),
});
} else {
uncached.push(entry.clone());
}
}
(cached, uncached)
}
fn cmd_build(path: Option<&str>, root: Option<&str>, output_dir: Option<&str>) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let mut exit_code = 0;
for case_path in &case_files {
let result = build_single_case(case_path, ®, output_dir);
if result != 0 {
exit_code = result;
}
}
exit_code
}
fn build_single_case(path: &str, reg: ®istry::EntityRegistry, output_dir: Option<&str>) -> i32 {
let case_output = match build_case_output(path, reg) {
Ok(output) => output,
Err(code) => return code,
};
write_case_output(path, &case_output.case_id, &case_output, output_dir)
}
fn write_case_output(
path: &str,
case_id: &str,
case_output: &output::CaseOutput,
output_dir: Option<&str>,
) -> i32 {
match output_dir {
Some(dir) => {
let out_path = format!("{dir}/{case_id}.json");
match serde_json::to_string_pretty(case_output) {
Ok(json) => {
if let Err(e) = std::fs::write(&out_path, json) {
eprintln!("{out_path}: error writing file: {e}");
return 2;
}
eprintln!("{path} -> {out_path}");
}
Err(e) => {
eprintln!("{path}: JSON serialization error: {e}");
return 2;
}
}
}
None => match serde_json::to_string_pretty(case_output) {
Ok(json) => println!("{json}"),
Err(e) => {
eprintln!("{path}: JSON serialization error: {e}");
return 2;
}
},
}
0
}
#[cfg(test)]
mod tests {
use super::*;
const FULL_CASE: &str = r"---
id: bonnick-v-arsenal
sources:
- https://www.theguardian.com/football/2025/feb/03/bonnick
- https://novaramedia.com/2025/02/04/bonnick
---
# Bonnick v Arsenal FC
Kit manager dismissed over social media posts about Israel-Gaza.
## Events
### Bonnick dismissal
- occurred_at: 2024-12-24
- document_type: termination
- description: Arsenal dismisses Bonnick over social media posts
regarding Israel-Gaza conflict.
### FA investigation finding
- occurred_at: 2024
- document_type: investigation
- description: FA investigates and finds the posts did not breach
FA rules. Matter closed by FA.
### Employment tribunal filing
- occurred_at: 2025-02-03
- document_type: filing
- description: Bonnick files employment tribunal claim against Arsenal.
## Relationships
- Bonnick dismissal -> FA investigation finding: related_to
- FA investigation finding -> Employment tribunal filing: related_to
- Bonnick dismissal -> Employment tribunal filing: related_to
- source: https://novaramedia.com/2025/02/04/bonnick
## Timeline
Bonnick dismissal -> FA investigation finding -> Employment tribunal filing
";
#[test]
fn parse_full_case_file() {
let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();
assert_eq!(case.id, "bonnick-v-arsenal");
assert_eq!(case.title, "Bonnick v Arsenal FC");
assert!(case.summary.contains("Kit manager dismissed"));
assert_eq!(case.sources.len(), 2);
assert_eq!(entities.len(), 3);
assert!(
entities
.iter()
.all(|e| e.label == entity::Label::PublicRecord)
);
let dismissal = entities
.iter()
.find(|e| e.name == "Bonnick dismissal")
.unwrap();
assert_eq!(dismissal.label, entity::Label::PublicRecord);
assert_eq!(rels.len(), 5);
let next_rels: Vec<_> = rels.iter().filter(|r| r.rel_type == "next").collect();
assert_eq!(next_rels.len(), 2);
assert_eq!(next_rels[0].source_name, "Bonnick dismissal");
assert_eq!(next_rels[0].target_name, "FA investigation finding");
assert_eq!(next_rels[1].source_name, "FA investigation finding");
assert_eq!(next_rels[1].target_name, "Employment tribunal filing");
}
#[test]
fn parse_full_minimal_case() {
let input = r"---
id: minimal-test
sources:
- https://example.com/source
---
# Minimal Test Case
A simple test.
## Events
### Something happened
- occurred_at: 2025-01-01
- document_type: court_ruling
";
let (case, entities, rels) = parse_full(input, None).unwrap();
assert_eq!(case.id, "minimal-test");
assert_eq!(case.title, "Minimal Test Case");
assert_eq!(entities.len(), 1);
assert_eq!(entities[0].name, "Something happened");
assert!(rels.is_empty());
}
#[test]
fn json_snapshot_full_case() {
let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();
let build_result = output::build_output(
&case.id,
&case.title,
&case.summary,
&case.sources,
&entities,
&rels,
&[],
)
.unwrap();
let json = serde_json::to_string_pretty(&build_result.output).unwrap();
assert!(json.contains("\"case_id\": \"bonnick-v-arsenal\""));
assert!(json.contains("\"title\": \"Bonnick v Arsenal FC\""));
assert!(json.contains("\"label\": \"public_record\""));
assert!(json.contains("\"name\": \"Bonnick dismissal\""));
assert!(json.contains("\"name\": \"FA investigation finding\""));
assert!(json.contains("\"document_type\": \"termination\""));
assert!(json.contains("\"document_type\": \"investigation\""));
assert!(json.contains("\"type\": \"related_to\""));
assert!(json.contains("\"type\": \"next\""));
let output: serde_json::Value = serde_json::from_str(&json).unwrap();
let nodes = output["nodes"].as_array().unwrap();
let rels_arr = output["relationships"].as_array().unwrap();
for node in nodes {
let id = node["id"].as_str().unwrap();
assert!(!id.is_empty());
assert!(id.len() >= 20);
}
for rel in rels_arr {
let id = rel["id"].as_str().unwrap();
assert!(!id.is_empty());
}
let node_ids: Vec<&str> = nodes.iter().map(|n| n["id"].as_str().unwrap()).collect();
for rel in rels_arr {
let source_id = rel["source_id"].as_str().unwrap();
let target_id = rel["target_id"].as_str().unwrap();
assert!(
node_ids.contains(&source_id),
"source_id {source_id} not found in nodes"
);
assert!(
node_ids.contains(&target_id),
"target_id {target_id} not found in nodes"
);
}
}
#[test]
fn json_snapshot_omits_empty_fields() {
let input = r"---
id: sparse
sources:
- https://example.com/src
---
# Sparse Case
Summary.
## Events
### Something
- occurred_at: 2025-01-01
";
let (case, entities, rels) = parse_full(input, None).unwrap();
let build_result = output::build_output(
&case.id,
&case.title,
&case.summary,
&case.sources,
&entities,
&rels,
&[],
)
.unwrap();
let json = serde_json::to_string_pretty(&build_result.output).unwrap();
assert!(!json.contains("\"qualifier\""));
assert!(!json.contains("\"description\""));
assert!(!json.contains("\"thumbnail\""));
assert!(!json.contains("\"aliases\""));
assert!(!json.contains("\"urls\""));
assert!(json.contains("\"occurred_at\": \"2025-01-01\""));
}
#[test]
fn cross_file_resolution_with_registry() {
use std::path::PathBuf;
use weave_content::entity::Entity;
let entries = vec![registry::RegistryEntry {
entity: Entity {
name: "Mark Bonnick".to_string(),
label: entity::Label::Actor,
fields: vec![(
"nationality".to_string(),
entity::FieldValue::Single("British".to_string()),
)],
id: Some("01JXYZ123456789ABCDEFGHIJK".to_string()),
line: 1,
},
path: PathBuf::from("actors/mark-bonnick.md"),
}];
let reg = registry::EntityRegistry::from_entries(entries).unwrap();
let input = r"---
id: test-cross-ref
sources:
- https://example.com/src
---
# Cross Reference Test
Summary.
## Events
### Dismissal
- occurred_at: 2024-12-24
- document_type: termination
## Relationships
- Mark Bonnick -> Dismissal: related_to
";
let err = parse_full(input, None).unwrap_err();
assert!(err.iter().any(|e| e.message.contains("Mark Bonnick")));
let (case, entities, rels) = parse_full(input, Some(®)).unwrap();
assert_eq!(case.id, "test-cross-ref");
assert_eq!(entities.len(), 1); assert_eq!(rels.len(), 1);
assert_eq!(rels[0].source_name, "Mark Bonnick");
assert_eq!(rels[0].target_name, "Dismissal");
}
}