use std::collections::HashSet;
use clap::{Parser, Subcommand};
use weave_content::build_cache;
use weave_content::commands;
use weave_content::output;
use weave_content::registry;
use weave_content::{
build_case_output_tracked, load_registry, resolve_case_files, resolve_content_root,
};
#[cfg(test)]
use weave_content::entity;
#[derive(Parser)]
#[command(name = "weave-content", version, about)]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Validate {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(long)]
strict: bool,
#[arg(long, short)]
quiet: bool,
},
Verify {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(long, default_value_t = 16)]
concurrency: usize,
#[arg(long, default_value_t = 15)]
timeout: u64,
#[arg(long)]
cache: Option<String>,
#[arg(long)]
warn_only: bool,
},
Build {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(short, long)]
output: Option<String>,
#[arg(long)]
html: bool,
#[arg(long, default_value = "https://redberrythread.org")]
base_url: String,
#[arg(long)]
thumbnail_base_url: Option<String>,
#[arg(long)]
full: bool,
},
CheckStaleness {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(long, default_value_t = 6)]
investigation_months: u32,
#[arg(long, default_value_t = 12)]
trial_months: u32,
#[arg(long, default_value_t = 12)]
appeal_months: u32,
},
}
fn main() {
let cli = Cli::parse();
let exit_code = match cli.command {
Command::Validate {
ref path,
ref root,
strict,
quiet,
} => commands::validate(path.as_deref(), root.as_deref(), strict, quiet),
Command::Verify {
ref path,
ref root,
concurrency,
timeout,
ref cache,
warn_only,
} => {
let config = commands::VerifyConfig {
concurrency,
timeout,
cache_path: cache.clone(),
warn_only,
};
commands::verify(path.as_deref(), root.as_deref(), &config)
}
Command::Build {
ref path,
ref root,
ref output,
html,
ref base_url,
ref thumbnail_base_url,
full,
} => cmd_build(
path.as_deref(),
root.as_deref(),
output.as_deref(),
html,
base_url,
thumbnail_base_url.as_deref(),
full,
),
Command::CheckStaleness {
ref path,
ref root,
investigation_months,
trial_months,
appeal_months,
} => {
let config = commands::StalenessConfig {
investigation_months,
trial_months,
appeal_months,
};
commands::check_staleness(path.as_deref(), root.as_deref(), &config)
}
};
std::process::exit(exit_code);
}
fn cmd_build(
path: Option<&str>,
root: Option<&str>,
output_dir: Option<&str>,
generate_html: bool,
base_url: &str,
thumbnail_base_url: Option<&str>,
force_full: bool,
) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let mut build_cache = load_build_cache(&content_root, force_full);
let file_hashes = hash_content_files(®, &case_files);
let case_nulid_map = match weave_content::build_case_index(&case_files, &content_root) {
Ok(m) => m,
Err(code) => return code,
};
let (exit_code, all_outputs) = build_cases(
&case_files,
®,
&case_nulid_map,
&file_hashes,
&mut build_cache,
output_dir,
generate_html,
force_full,
);
finalize_build_cache(&mut build_cache, &case_files, &file_hashes);
let mut exit_code = exit_code;
if generate_html {
if let Some(dir) = output_dir {
let html_result = weave_content::generate_html_output(
dir,
&all_outputs,
base_url,
thumbnail_base_url,
);
if html_result != 0 {
exit_code = html_result;
}
} else {
eprintln!("--html requires --output directory");
exit_code = 1;
}
}
exit_code
}
fn load_build_cache(content_root: &std::path::Path, force_full: bool) -> build_cache::BuildCache {
if force_full {
build_cache::BuildCache::empty()
} else {
match build_cache::BuildCache::load(content_root) {
Ok(c) => c,
Err(e) => {
eprintln!("warning: {e}, starting fresh");
build_cache::BuildCache::empty()
}
}
}
}
fn hash_content_files(
reg: ®istry::EntityRegistry,
case_files: &[String],
) -> std::collections::HashMap<String, String> {
let mut file_hashes = std::collections::HashMap::new();
for entry in reg.entries() {
if let Some(path_str) = entry.path.to_str()
&& let Ok(hash) = build_cache::hash_file(&entry.path)
{
file_hashes.insert(path_str.to_string(), hash);
}
}
for case_path in case_files {
if let Ok(hash) = build_cache::hash_file(std::path::Path::new(case_path)) {
file_hashes.insert(case_path.clone(), hash);
}
}
file_hashes
}
#[allow(clippy::too_many_arguments)]
fn build_cases(
case_files: &[String],
reg: ®istry::EntityRegistry,
case_nulid_map: &std::collections::HashMap<String, (String, String)>,
file_hashes: &std::collections::HashMap<String, String>,
build_cache: &mut build_cache::BuildCache,
output_dir: Option<&str>,
collect_html: bool,
force_full: bool,
) -> (i32, Vec<output::CaseOutput>) {
let mut exit_code = 0;
let mut written_entities = HashSet::new();
let mut all_outputs = Vec::new();
let mut skipped = 0usize;
for case_path in case_files {
if !force_full
&& !collect_html
&& let Some(current_hash) = file_hashes.get(case_path)
&& build_cache.is_unchanged_with_hashes(case_path, current_hash, file_hashes)
{
skipped += 1;
continue;
}
match build_case_output_tracked(case_path, reg, &mut written_entities, case_nulid_map) {
Ok(case_output) => {
let write_result =
write_case_output(case_path, &case_output.case_id, &case_output, output_dir);
if write_result != 0 {
exit_code = write_result;
}
let deps: Vec<String> = case_output
.nodes
.iter()
.filter_map(|n| {
reg.get_by_name(&n.name)
.and_then(|e| e.path.to_str().map(String::from))
})
.collect();
if let Some(hash) = file_hashes.get(case_path) {
build_cache.put(case_path, hash.clone(), deps);
}
if collect_html {
all_outputs.push(case_output);
}
}
Err(code) => {
exit_code = code;
}
}
}
if skipped > 0 {
eprintln!("incremental: {skipped} case(s) unchanged, skipped");
}
(exit_code, all_outputs)
}
fn finalize_build_cache(
build_cache: &mut build_cache::BuildCache,
case_files: &[String],
file_hashes: &std::collections::HashMap<String, String>,
) {
for (path_str, hash) in file_hashes {
if !case_files.contains(path_str) {
build_cache.put(path_str, hash.clone(), vec![]);
}
}
let all_files: std::collections::HashSet<String> = file_hashes.keys().cloned().collect();
build_cache.prune(&all_files);
if let Err(e) = build_cache.save() {
eprintln!("warning: failed to save build cache: {e}");
} else if !build_cache.is_empty() {
eprintln!("build cache: {} entries saved", build_cache.len());
}
}
fn write_case_output(
path: &str,
case_id: &str,
case_output: &output::CaseOutput,
output_dir: Option<&str>,
) -> i32 {
match output_dir {
Some(dir) => {
let out_path = format!("{dir}/{case_id}.json");
match serde_json::to_string_pretty(case_output) {
Ok(json) => {
if let Err(e) = std::fs::write(&out_path, json) {
eprintln!("{out_path}: error writing file: {e}");
return 2;
}
eprintln!("{path} -> {out_path}");
}
Err(e) => {
eprintln!("{path}: JSON serialization error: {e}");
return 2;
}
}
}
None => match serde_json::to_string_pretty(case_output) {
Ok(json) => println!("{json}"),
Err(e) => {
eprintln!("{path}: JSON serialization error: {e}");
return 2;
}
},
}
0
}
#[cfg(test)]
mod tests {
use super::*;
use weave_content::parse_full;
const FULL_CASE: &str = r"---
id: 01JABC000000000000000000AA
sources:
- https://www.theguardian.com/football/2025/feb/03/bonnick
- https://novaramedia.com/2025/02/04/bonnick
---
# Bonnick v Arsenal FC
Kit manager dismissed over social media posts about Israel-Gaza.
## Events
### Bonnick dismissal
- occurred_at: 2024-12-24
- event_type: dismissal
- description: Arsenal dismisses Bonnick over social media posts
regarding Israel-Gaza conflict.
### FA investigation finding
- occurred_at: 2024
- event_type: investigation_closed
- description: FA investigates and finds the posts did not breach
FA rules. Matter closed by FA.
### Employment tribunal filing
- occurred_at: 2025-02-03
- event_type: custom:Employment Tribunal
- description: Bonnick files employment tribunal claim against Arsenal.
## Relationships
- Bonnick dismissal -> FA investigation finding: preceded_by
- FA investigation finding -> Employment tribunal filing: preceded_by
- Bonnick dismissal -> Employment tribunal filing: references
source: https://novaramedia.com/2025/02/04/bonnick
## Timeline
- Bonnick dismissal -> FA investigation finding
- FA investigation finding -> Employment tribunal filing
";
#[test]
fn parse_full_case_file() {
let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();
assert_eq!(case.id.as_deref(), Some("01JABC000000000000000000AA"));
assert_eq!(case.title, "Bonnick v Arsenal FC");
assert!(case.summary.contains("Kit manager dismissed"));
assert_eq!(case.sources.len(), 2);
assert_eq!(entities.len(), 3);
assert!(entities.iter().all(|e| e.label == entity::Label::Event));
let dismissal = entities
.iter()
.find(|e| e.name == "Bonnick dismissal")
.unwrap();
assert_eq!(dismissal.label, entity::Label::Event);
assert_eq!(rels.len(), 5);
let timeline_rels: Vec<_> = rels
.iter()
.filter(|r| r.rel_type == "preceded_by" && r.source_urls.is_empty())
.collect();
assert_eq!(timeline_rels.len(), 2);
assert_eq!(timeline_rels[0].source_name, "Bonnick dismissal");
assert_eq!(timeline_rels[0].target_name, "FA investigation finding");
assert_eq!(timeline_rels[1].source_name, "FA investigation finding");
assert_eq!(timeline_rels[1].target_name, "Employment tribunal filing");
}
#[test]
fn parse_full_minimal_case() {
let input = r"---
sources:
- https://example.com/source
---
# Minimal Test Case
A simple test.
## Events
### Something happened
- occurred_at: 2025-01-01
- event_type: conviction
";
let (case, entities, rels) = parse_full(input, None).unwrap();
assert!(case.id.is_none());
assert_eq!(case.title, "Minimal Test Case");
assert_eq!(entities.len(), 1);
assert_eq!(entities[0].name, "Something happened");
assert!(rels.is_empty());
}
#[test]
fn json_snapshot_full_case() {
let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();
let build_result = output::build_output(
"bonnick-v-arsenal",
"01TEST00000000000000000000",
&case.title,
&case.summary,
&case.tags,
None,
case.case_type.as_deref(),
case.status.as_deref(),
case.amounts.as_deref(),
case.tagline.as_deref(),
&case.sources,
&case.related_cases,
&std::collections::HashMap::new(),
&entities,
&rels,
&[],
&case.involved,
)
.unwrap();
let json = serde_json::to_string_pretty(&build_result.output).unwrap();
assert!(json.contains("\"case_id\": \"bonnick-v-arsenal\""));
assert!(json.contains("\"title\": \"Bonnick v Arsenal FC\""));
assert!(json.contains("\"label\": \"event\""));
assert!(json.contains("\"name\": \"Bonnick dismissal\""));
assert!(json.contains("\"name\": \"FA investigation finding\""));
assert!(json.contains("\"event_type\": \"dismissal\""));
assert!(json.contains("\"event_type\": \"investigation_closed\""));
assert!(json.contains("\"type\": \"preceded_by\""));
assert!(json.contains("\"type\": \"references\""));
let output: serde_json::Value = serde_json::from_str(&json).unwrap();
let nodes = output["nodes"].as_array().unwrap();
let rels_arr = output["relationships"].as_array().unwrap();
for node in nodes {
let id = node["id"].as_str().unwrap();
assert!(!id.is_empty());
assert!(id.len() >= 20);
}
for rel in rels_arr {
let id = rel["id"].as_str().unwrap();
assert!(!id.is_empty());
}
let node_ids: Vec<&str> = nodes.iter().map(|n| n["id"].as_str().unwrap()).collect();
for rel in rels_arr {
let source_id = rel["source_id"].as_str().unwrap();
let target_id = rel["target_id"].as_str().unwrap();
assert!(
node_ids.contains(&source_id),
"source_id {source_id} not found in nodes"
);
assert!(
node_ids.contains(&target_id),
"target_id {target_id} not found in nodes"
);
}
}
#[test]
fn json_snapshot_omits_empty_fields() {
let input = r"---
sources:
- https://example.com/src
---
# Sparse Case
Summary.
## Events
### Something
- occurred_at: 2025-01-01
";
let (case, entities, rels) = parse_full(input, None).unwrap();
let build_result = output::build_output(
"sparse",
"01TEST00000000000000000000",
&case.title,
&case.summary,
&case.tags,
None,
case.case_type.as_deref(),
case.status.as_deref(),
case.amounts.as_deref(),
case.tagline.as_deref(),
&case.sources,
&case.related_cases,
&std::collections::HashMap::new(),
&entities,
&rels,
&[],
&case.involved,
)
.unwrap();
let json = serde_json::to_string_pretty(&build_result.output).unwrap();
assert!(!json.contains("\"qualifier\""));
assert!(!json.contains("\"thumbnail\""));
assert!(!json.contains("\"aliases\""));
assert!(!json.contains("\"urls\""));
assert!(json.contains("\"occurred_at\": \"2025-01-01\""));
}
#[test]
fn cross_file_resolution_with_registry() {
use std::path::PathBuf;
use weave_content::entity::Entity;
let entries = vec![registry::RegistryEntry {
entity: Entity {
name: "Mark Bonnick".to_string(),
label: entity::Label::Person,
fields: vec![(
"nationality".to_string(),
entity::FieldValue::Single("British".to_string()),
)],
id: Some("01JXYZ123456789ABCDEFGHIJK".to_string()),
line: 1,
tags: Vec::new(),
slug: None,
},
path: PathBuf::from("people/mark-bonnick.md"),
tags: Vec::new(),
}];
let reg = registry::EntityRegistry::from_entries(entries).unwrap();
let input = r"---
sources:
- https://example.com/src
---
# Cross Reference Test
Summary.
## Events
### Dismissal
- occurred_at: 2024-12-24
- event_type: dismissal
## Relationships
- Mark Bonnick -> Dismissal: associate_of
";
let err = parse_full(input, None).unwrap_err();
assert!(err.iter().any(|e| e.message.contains("Mark Bonnick")));
let (case, entities, rels) = parse_full(input, Some(®)).unwrap();
assert!(case.id.is_none());
assert_eq!(entities.len(), 1); assert_eq!(rels.len(), 1);
assert_eq!(rels[0].source_name, "Mark Bonnick");
assert_eq!(rels[0].target_name, "Dismissal");
}
}