use clap::{Parser, Subcommand};
use rayon::prelude::*;
use weave_content::build_cache;
use weave_content::cache;
use weave_content::output;
use weave_content::registry;
use weave_content::tags;
use weave_content::verifier;
use weave_content::{
build_case_output_tracked, load_registry, load_tag_registry, parse_full, resolve_case_files,
resolve_content_root,
};
#[cfg(test)]
use weave_content::entity;
#[derive(Parser)]
#[command(name = "weave-content", version, about)]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Validate {
path: Option<String>,
#[arg(long)]
root: Option<String>,
},
Verify {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(long, default_value_t = 16)]
concurrency: usize,
#[arg(long, default_value_t = 15)]
timeout: u64,
#[arg(long)]
cache: Option<String>,
#[arg(long)]
warn_only: bool,
},
Build {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(short, long)]
output: Option<String>,
#[arg(long)]
html: bool,
#[arg(long, default_value = "https://redberrythread.org")]
base_url: String,
#[arg(long)]
full: bool,
},
}
fn main() {
let cli = Cli::parse();
let exit_code = match cli.command {
Command::Validate { ref path, ref root } => cmd_validate(path.as_deref(), root.as_deref()),
Command::Verify {
ref path,
ref root,
concurrency,
timeout,
ref cache,
warn_only,
} => cmd_verify(
path.as_deref(),
root.as_deref(),
concurrency,
timeout,
cache.as_deref(),
warn_only,
),
Command::Build {
ref path,
ref root,
ref output,
html,
ref base_url,
full,
} => cmd_build(
path.as_deref(),
root.as_deref(),
output.as_deref(),
html,
base_url,
full,
),
};
std::process::exit(exit_code);
}
fn cmd_validate(path: Option<&str>, root: Option<&str>) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let tag_reg = match load_tag_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
if !reg.is_empty() {
eprintln!("registry: {} entities loaded", reg.len());
}
if !tag_reg.is_empty() {
eprintln!(
"tags: {} tags loaded across {} categories",
tag_reg.len(),
tag_reg.category_slugs().len()
);
}
let mut entity_tag_errors = false;
for entry in reg.entries() {
let tag_errors = tag_reg.validate_tags(&entry.tags, 2);
for err in &tag_errors {
eprintln!("{}:{err}", entry.path.display());
}
if !tag_errors.is_empty() {
entity_tag_errors = true;
}
}
let results: Vec<ValidateResult> = case_files
.par_iter()
.map(|case_path| validate_single_case(case_path, ®, &tag_reg))
.collect();
let mut exit_code = i32::from(entity_tag_errors);
let mut all_events: Vec<(String, String)> = Vec::new();
for result in results {
if result.exit_code != 0 {
exit_code = result.exit_code;
}
all_events.extend(result.events);
}
if let Some(code) = check_duplicate_event_names(&all_events) {
exit_code = code;
}
check_qualifier_consistency(®);
exit_code
}
struct ValidateResult {
exit_code: i32,
events: Vec<(String, String)>,
}
fn validate_single_case(
path: &str,
reg: ®istry::EntityRegistry,
tag_reg: &tags::TagRegistry,
) -> ValidateResult {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
eprintln!("{path}: error reading file: {e}");
return ValidateResult {
exit_code: 2,
events: Vec::new(),
};
}
};
match parse_full(&content, Some(reg)) {
Ok((case, entities, rels)) => {
eprintln!(
"{path}: ok -- {id}: {title} ({ent} entities, {rel} relationships, {src} sources)",
id = case.id,
title = case.title,
ent = entities.len(),
rel = rels.len(),
src = case.sources.len(),
);
if !case.summary.is_empty() {
eprintln!(
" summary: {}...",
&case.summary[..case.summary.len().min(80)]
);
}
for e in &entities {
let id_display = e.id.as_deref().unwrap_or("(no id)");
eprintln!(
" line {}: {id_display} {} ({}, {} fields)",
e.line,
e.name,
e.label,
e.fields.len()
);
}
let events: Vec<(String, String)> = entities
.iter()
.filter(|e| e.label == weave_content::entity::Label::Event)
.map(|e| (e.name.clone(), path.to_string()))
.collect();
for r in &rels {
let id_display = r.id.as_deref().unwrap_or("(no id)");
eprintln!(
" line {}: {id_display} {} -> {}: {}",
r.line, r.source_name, r.target_name, r.rel_type,
);
}
let mut exit_code = 0;
let tag_errors = tag_reg.validate_tags(&case.tags, 2);
for err in &tag_errors {
eprintln!("{path}:{err}");
}
if !tag_errors.is_empty() {
exit_code = 1;
}
ValidateResult { exit_code, events }
}
Err(errors) => {
for err in &errors {
eprintln!("{path}:{err}");
}
ValidateResult {
exit_code: 1,
events: Vec::new(),
}
}
}
}
fn check_duplicate_event_names(all_events: &[(String, String)]) -> Option<i32> {
let mut seen: std::collections::HashMap<&str, &str> = std::collections::HashMap::new();
let mut has_duplicates = false;
for (name, path) in all_events {
if let Some(&first_path) = seen.get(name.as_str()) {
eprintln!(
"error: duplicate event name {name:?} in {path} (first defined in {first_path})"
);
has_duplicates = true;
} else {
seen.insert(name, path);
}
}
if has_duplicates { Some(1) } else { None }
}
fn check_qualifier_consistency(reg: ®istry::EntityRegistry) {
use weave_content::entity::FieldValue;
let mut by_lower: std::collections::HashMap<String, Vec<(String, String)>> =
std::collections::HashMap::new();
for entry in reg.entries() {
let qualifier = entry
.entity
.fields
.iter()
.find(|(k, _)| k == "qualifier")
.and_then(|(_, v)| match v {
FieldValue::Single(s) => Some(s.as_str()),
FieldValue::List(_) => None,
});
if let Some(q) = qualifier {
by_lower
.entry(q.to_lowercase())
.or_default()
.push((q.to_string(), entry.path.display().to_string()));
}
}
for occurrences in by_lower.values() {
let first = &occurrences[0].0;
let inconsistent: Vec<_> = occurrences.iter().filter(|(q, _)| q != first).collect();
if !inconsistent.is_empty() {
eprintln!(
"warning: inconsistent qualifier casing for {:?}:",
occurrences[0].0
);
for (q, path) in occurrences {
eprintln!(" {path}: {q:?}");
}
}
}
}
#[allow(clippy::too_many_lines)]
fn cmd_verify(
path: Option<&str>,
root: Option<&str>,
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let mut exit_code = 0;
for case_path in &case_files {
let result =
verify_single_case(case_path, ®, concurrency, timeout, cache_path, warn_only);
if result != 0 {
exit_code = result;
}
}
let reg_result = verify_registry_thumbnails(®, concurrency, timeout, cache_path, warn_only);
if reg_result != 0 {
exit_code = reg_result;
}
exit_code
}
#[allow(clippy::too_many_lines)]
fn verify_single_case(
path: &str,
reg: ®istry::EntityRegistry,
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
eprintln!("{path}: error reading file: {e}");
return 2;
}
};
let (case, entities, rels) = match parse_full(&content, Some(reg)) {
Ok(result) => result,
Err(errors) => {
for err in &errors {
eprintln!("{path}:{err}");
}
return 1;
}
};
let mut collect_errors = Vec::new();
let urls = verifier::collect_urls(&case.sources, &entities, &rels, &mut collect_errors);
if !collect_errors.is_empty() {
for err in &collect_errors {
eprintln!("{path}:{err}");
}
return 1;
}
if urls.is_empty() {
eprintln!("{path}: no URLs to verify");
return 0;
}
let mut verify_cache = cache_path.map(|p| match cache::VerifyCache::load(p) {
Ok(c) => {
eprintln!("{path}: using cache {p}");
c
}
Err(e) => {
eprintln!("{path}: cache load warning: {e}");
cache::VerifyCache::load("/dev/null").unwrap_or_else(|_| {
cache::VerifyCache::empty()
})
}
});
let (cached_results, urls_to_check) = partition_cached(&urls, verify_cache.as_ref());
let check_count = urls_to_check.len();
let cached_count = cached_results.len();
if cached_count > 0 {
eprintln!(
"{path}: {cached_count} cached, {check_count} to check (concurrency={concurrency}, timeout={timeout}s)"
);
} else {
eprintln!(
"{path}: verifying {check_count} URLs (concurrency={concurrency}, timeout={timeout}s)"
);
}
let fresh_results = if urls_to_check.is_empty() {
Vec::new()
} else {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
eprintln!("{path}: failed to create async runtime: {e}");
return 2;
}
};
rt.block_on(verifier::verify_urls(urls_to_check, concurrency, timeout))
};
if let Some(ref mut vc) = verify_cache {
for check in &fresh_results {
vc.put(&check.url, check.status, check.detail.as_deref());
}
}
let mut all_results = cached_results;
all_results.extend(fresh_results);
let mut has_error = false;
for check in &all_results {
let detail = check.detail.as_deref().unwrap_or("");
match check.status {
verifier::CheckStatus::Ok => {
eprintln!(
" ok {}{}",
check.url,
if check.is_thumbnail {
" [thumbnail]"
} else {
""
}
);
}
verifier::CheckStatus::Warn => {
eprintln!(" warn {} -- {detail}", check.url);
}
verifier::CheckStatus::Error => {
has_error = true;
eprintln!(" ERROR {} -- {detail}", check.url);
}
}
}
let ok_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Ok)
.count();
let warn_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Warn)
.count();
let err_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Error)
.count();
eprintln!("{path}: {ok_count} ok, {warn_count} warn, {err_count} error");
if let Some(ref vc) = verify_cache
&& let Err(e) = vc.save()
{
eprintln!("{path}: cache save warning: {e}");
}
i32::from(has_error && !warn_only)
}
fn verify_registry_thumbnails(
reg: ®istry::EntityRegistry,
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let urls = verifier::collect_registry_urls(reg);
if urls.is_empty() {
return 0;
}
let label = "(registry)";
let mut verify_cache = cache_path.map(|p| match cache::VerifyCache::load(p) {
Ok(c) => c,
Err(e) => {
eprintln!("{label}: cache load warning: {e}");
cache::VerifyCache::load("/dev/null").unwrap_or_else(|_| cache::VerifyCache::empty())
}
});
let (cached_results, urls_to_check) = partition_cached(&urls, verify_cache.as_ref());
let check_count = urls_to_check.len();
let cached_count = cached_results.len();
if cached_count > 0 {
eprintln!(
"{label}: {cached_count} cached, {check_count} to check (concurrency={concurrency}, timeout={timeout}s)"
);
} else {
eprintln!(
"{label}: verifying {check_count} thumbnail URLs (concurrency={concurrency}, timeout={timeout}s)"
);
}
let fresh_results = if urls_to_check.is_empty() {
Vec::new()
} else {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
eprintln!("{label}: failed to create async runtime: {e}");
return 2;
}
};
rt.block_on(verifier::verify_urls(urls_to_check, concurrency, timeout))
};
if let Some(ref mut vc) = verify_cache {
for check in &fresh_results {
vc.put(&check.url, check.status, check.detail.as_deref());
}
}
let mut all_results = cached_results;
all_results.extend(fresh_results);
let mut has_error = false;
for check in &all_results {
let detail = check.detail.as_deref().unwrap_or("");
match check.status {
verifier::CheckStatus::Ok => {
eprintln!(" ok {} [thumbnail]", check.url);
}
verifier::CheckStatus::Warn => {
eprintln!(" warn {} -- {detail}", check.url);
}
verifier::CheckStatus::Error => {
has_error = true;
eprintln!(" ERROR {} -- {detail}", check.url);
}
}
}
let ok_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Ok)
.count();
let warn_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Warn)
.count();
let err_count = all_results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Error)
.count();
eprintln!("{label}: {ok_count} ok, {warn_count} warn, {err_count} error");
if let Some(ref vc) = verify_cache
&& let Err(e) = vc.save()
{
eprintln!("{label}: cache save warning: {e}");
}
i32::from(has_error && !warn_only)
}
fn partition_cached(
urls: &[verifier::UrlEntry],
verify_cache: Option<&cache::VerifyCache>,
) -> (Vec<verifier::UrlCheck>, Vec<verifier::UrlEntry>) {
let Some(vc) = verify_cache else {
return (Vec::new(), urls.to_vec());
};
let mut cached = Vec::new();
let mut uncached = Vec::new();
for entry in urls {
if let Some(cache_entry) = vc.get(entry.url()) {
let status = match cache_entry.status.as_str() {
"ok" => verifier::CheckStatus::Ok,
"warn" => verifier::CheckStatus::Warn,
_ => verifier::CheckStatus::Error,
};
cached.push(verifier::UrlCheck {
url: entry.url().to_string(),
status,
detail: cache_entry.detail.clone(),
is_thumbnail: entry.is_thumbnail(),
});
} else {
uncached.push(entry.clone());
}
}
(cached, uncached)
}
fn cmd_build(
path: Option<&str>,
root: Option<&str>,
output_dir: Option<&str>,
generate_html: bool,
base_url: &str,
force_full: bool,
) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let mut build_cache = if force_full {
build_cache::BuildCache::empty()
} else {
match build_cache::BuildCache::load(&content_root) {
Ok(c) => c,
Err(e) => {
eprintln!("warning: {e}, starting fresh");
build_cache::BuildCache::empty()
}
}
};
let mut file_hashes: std::collections::HashMap<String, String> =
std::collections::HashMap::new();
for entry in reg.entries() {
if let Some(path_str) = entry.path.to_str() {
if let Ok(hash) = build_cache::hash_file(&entry.path) {
file_hashes.insert(path_str.to_string(), hash);
}
}
}
for case_path in &case_files {
if let Ok(hash) = build_cache::hash_file(std::path::Path::new(case_path)) {
file_hashes.insert(case_path.clone(), hash);
}
}
let mut exit_code = 0;
let mut written_entities = std::collections::HashSet::new();
let mut all_outputs: Vec<output::CaseOutput> = Vec::new();
let mut skipped = 0usize;
for case_path in &case_files {
if !force_full && !generate_html {
if let Some(current_hash) = file_hashes.get(case_path)
&& build_cache.is_unchanged_with_hashes(case_path, current_hash, &file_hashes)
{
skipped += 1;
continue;
}
}
match build_case_output_tracked(case_path, ®, &mut written_entities) {
Ok(case_output) => {
let write_result =
write_case_output(case_path, &case_output.case_id, &case_output, output_dir);
if write_result != 0 {
exit_code = write_result;
}
let deps: Vec<String> = case_output
.nodes
.iter()
.filter_map(|n| {
let name = &n.name;
reg.get_by_name(name)
.and_then(|e| e.path.to_str().map(String::from))
})
.collect();
if let Some(hash) = file_hashes.get(case_path) {
build_cache.put(case_path, hash.clone(), deps);
}
if generate_html {
all_outputs.push(case_output);
}
}
Err(code) => {
exit_code = code;
}
}
}
if skipped > 0 {
eprintln!("incremental: {skipped} case(s) unchanged, skipped");
}
for (path_str, hash) in &file_hashes {
if !case_files.contains(path_str) {
build_cache.put(path_str, hash.clone(), vec![]);
}
}
let all_files: std::collections::HashSet<String> = file_hashes.keys().cloned().collect();
build_cache.prune(&all_files);
if let Err(e) = build_cache.save() {
eprintln!("warning: failed to save build cache: {e}");
} else if !build_cache.is_empty() {
eprintln!("build cache: {} entries saved", build_cache.len());
}
if generate_html {
if let Some(dir) = output_dir {
let html_result = generate_html_output(dir, &all_outputs, base_url);
if html_result != 0 {
exit_code = html_result;
}
} else {
eprintln!("--html requires --output directory");
exit_code = 1;
}
}
exit_code
}
#[allow(clippy::too_many_lines)]
fn generate_html_output(output_dir: &str, cases: &[output::CaseOutput], base_url: &str) -> i32 {
use weave_content::html;
let html_dir = format!("{output_dir}/html");
let cases_dir = format!("{html_dir}/cases");
let people_dir = format!("{html_dir}/people");
let orgs_dir = format!("{html_dir}/organizations");
for dir in [&cases_dir, &people_dir, &orgs_dir] {
if let Err(e) = std::fs::create_dir_all(dir) {
eprintln!("error creating directory {dir}: {e}");
return 2;
}
}
let mut person_cases: std::collections::HashMap<String, Vec<(String, String)>> =
std::collections::HashMap::new();
let mut org_cases: std::collections::HashMap<String, Vec<(String, String)>> =
std::collections::HashMap::new();
let mut all_people: std::collections::HashMap<String, &output::NodeOutput> =
std::collections::HashMap::new();
let mut all_orgs: std::collections::HashMap<String, &output::NodeOutput> =
std::collections::HashMap::new();
for case in cases {
match html::render_case(case) {
Ok(fragment) => {
let path = format!("{cases_dir}/{}.html", case.case_id);
if let Err(e) = std::fs::write(&path, &fragment) {
eprintln!("error writing {path}: {e}");
return 2;
}
eprintln!("html: {path}");
}
Err(e) => {
eprintln!("error rendering case {}: {e}", case.case_id);
return 2;
}
}
for node in &case.nodes {
match node.label.as_str() {
"person" => {
person_cases
.entry(node.id.clone())
.or_default()
.push((case.case_id.clone(), case.title.clone()));
all_people.entry(node.id.clone()).or_insert(node);
}
"organization" => {
org_cases
.entry(node.id.clone())
.or_default()
.push((case.case_id.clone(), case.title.clone()));
all_orgs.entry(node.id.clone()).or_insert(node);
}
_ => {}
}
}
}
for (id, node) in &all_people {
let case_list = person_cases.get(id).cloned().unwrap_or_default();
match html::render_person(node, &case_list) {
Ok(fragment) => {
let path = format!("{people_dir}/{id}.html");
if let Err(e) = std::fs::write(&path, &fragment) {
eprintln!("error writing {path}: {e}");
return 2;
}
}
Err(e) => {
eprintln!("error rendering person {id}: {e}");
return 2;
}
}
}
eprintln!("html: {} person pages", all_people.len());
for (id, node) in &all_orgs {
let case_list = org_cases.get(id).cloned().unwrap_or_default();
match html::render_organization(node, &case_list) {
Ok(fragment) => {
let path = format!("{orgs_dir}/{id}.html");
if let Err(e) = std::fs::write(&path, &fragment) {
eprintln!("error writing {path}: {e}");
return 2;
}
}
Err(e) => {
eprintln!("error rendering organization {id}: {e}");
return 2;
}
}
}
eprintln!("html: {} organization pages", all_orgs.len());
let case_entries: Vec<(String, String)> = cases
.iter()
.map(|c| (c.case_id.clone(), c.title.clone()))
.collect();
let people_entries: Vec<(String, String)> = all_people
.iter()
.map(|(id, n)| (id.clone(), n.name.clone()))
.collect();
let org_entries: Vec<(String, String)> = all_orgs
.iter()
.map(|(id, n)| (id.clone(), n.name.clone()))
.collect();
let sitemap = html::render_sitemap(&case_entries, &people_entries, &org_entries, base_url);
let sitemap_path = format!("{html_dir}/sitemap.xml");
if let Err(e) = std::fs::write(&sitemap_path, &sitemap) {
eprintln!("error writing {sitemap_path}: {e}");
return 2;
}
eprintln!("html: {sitemap_path}");
0
}
fn write_case_output(
path: &str,
case_id: &str,
case_output: &output::CaseOutput,
output_dir: Option<&str>,
) -> i32 {
match output_dir {
Some(dir) => {
let out_path = format!("{dir}/{case_id}.json");
match serde_json::to_string_pretty(case_output) {
Ok(json) => {
if let Err(e) = std::fs::write(&out_path, json) {
eprintln!("{out_path}: error writing file: {e}");
return 2;
}
eprintln!("{path} -> {out_path}");
}
Err(e) => {
eprintln!("{path}: JSON serialization error: {e}");
return 2;
}
}
}
None => match serde_json::to_string_pretty(case_output) {
Ok(json) => println!("{json}"),
Err(e) => {
eprintln!("{path}: JSON serialization error: {e}");
return 2;
}
},
}
0
}
#[cfg(test)]
mod tests {
use super::*;
const FULL_CASE: &str = r"---
id: bonnick-v-arsenal
sources:
- https://www.theguardian.com/football/2025/feb/03/bonnick
- https://novaramedia.com/2025/02/04/bonnick
---
# Bonnick v Arsenal FC
Kit manager dismissed over social media posts about Israel-Gaza.
## Events
### Bonnick dismissal
- occurred_at: 2024-12-24
- event_type: dismissal
- description: Arsenal dismisses Bonnick over social media posts
regarding Israel-Gaza conflict.
### FA investigation finding
- occurred_at: 2024
- event_type: investigation_closed
- description: FA investigates and finds the posts did not breach
FA rules. Matter closed by FA.
### Employment tribunal filing
- occurred_at: 2025-02-03
- event_type: custom:Employment Tribunal
- description: Bonnick files employment tribunal claim against Arsenal.
## Relationships
- Bonnick dismissal -> FA investigation finding: preceded_by
- FA investigation finding -> Employment tribunal filing: preceded_by
- Bonnick dismissal -> Employment tribunal filing: references
- source: https://novaramedia.com/2025/02/04/bonnick
## Timeline
Bonnick dismissal -> FA investigation finding -> Employment tribunal filing
";
#[test]
fn parse_full_case_file() {
let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();
assert_eq!(case.id, "bonnick-v-arsenal");
assert_eq!(case.title, "Bonnick v Arsenal FC");
assert!(case.summary.contains("Kit manager dismissed"));
assert_eq!(case.sources.len(), 2);
assert_eq!(entities.len(), 3);
assert!(entities.iter().all(|e| e.label == entity::Label::Event));
let dismissal = entities
.iter()
.find(|e| e.name == "Bonnick dismissal")
.unwrap();
assert_eq!(dismissal.label, entity::Label::Event);
assert_eq!(rels.len(), 5);
let timeline_rels: Vec<_> = rels
.iter()
.filter(|r| r.rel_type == "preceded_by" && r.source_urls.is_empty())
.collect();
assert_eq!(timeline_rels.len(), 2);
assert_eq!(timeline_rels[0].source_name, "Bonnick dismissal");
assert_eq!(timeline_rels[0].target_name, "FA investigation finding");
assert_eq!(timeline_rels[1].source_name, "FA investigation finding");
assert_eq!(timeline_rels[1].target_name, "Employment tribunal filing");
}
#[test]
fn parse_full_minimal_case() {
let input = r"---
id: minimal-test
sources:
- https://example.com/source
---
# Minimal Test Case
A simple test.
## Events
### Something happened
- occurred_at: 2025-01-01
- event_type: conviction
";
let (case, entities, rels) = parse_full(input, None).unwrap();
assert_eq!(case.id, "minimal-test");
assert_eq!(case.title, "Minimal Test Case");
assert_eq!(entities.len(), 1);
assert_eq!(entities[0].name, "Something happened");
assert!(rels.is_empty());
}
#[test]
fn json_snapshot_full_case() {
let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();
let build_result = output::build_output(
&case.id,
&case.title,
&case.summary,
&case.tags,
&case.sources,
&entities,
&rels,
&[],
)
.unwrap();
let json = serde_json::to_string_pretty(&build_result.output).unwrap();
assert!(json.contains("\"case_id\": \"bonnick-v-arsenal\""));
assert!(json.contains("\"title\": \"Bonnick v Arsenal FC\""));
assert!(json.contains("\"label\": \"event\""));
assert!(json.contains("\"name\": \"Bonnick dismissal\""));
assert!(json.contains("\"name\": \"FA investigation finding\""));
assert!(json.contains("\"event_type\": \"dismissal\""));
assert!(json.contains("\"event_type\": \"investigation_closed\""));
assert!(json.contains("\"type\": \"preceded_by\""));
assert!(json.contains("\"type\": \"references\""));
let output: serde_json::Value = serde_json::from_str(&json).unwrap();
let nodes = output["nodes"].as_array().unwrap();
let rels_arr = output["relationships"].as_array().unwrap();
for node in nodes {
let id = node["id"].as_str().unwrap();
assert!(!id.is_empty());
assert!(id.len() >= 20);
}
for rel in rels_arr {
let id = rel["id"].as_str().unwrap();
assert!(!id.is_empty());
}
let node_ids: Vec<&str> = nodes.iter().map(|n| n["id"].as_str().unwrap()).collect();
for rel in rels_arr {
let source_id = rel["source_id"].as_str().unwrap();
let target_id = rel["target_id"].as_str().unwrap();
assert!(
node_ids.contains(&source_id),
"source_id {source_id} not found in nodes"
);
assert!(
node_ids.contains(&target_id),
"target_id {target_id} not found in nodes"
);
}
}
#[test]
fn json_snapshot_omits_empty_fields() {
let input = r"---
id: sparse
sources:
- https://example.com/src
---
# Sparse Case
Summary.
## Events
### Something
- occurred_at: 2025-01-01
";
let (case, entities, rels) = parse_full(input, None).unwrap();
let build_result = output::build_output(
&case.id,
&case.title,
&case.summary,
&case.tags,
&case.sources,
&entities,
&rels,
&[],
)
.unwrap();
let json = serde_json::to_string_pretty(&build_result.output).unwrap();
assert!(!json.contains("\"qualifier\""));
assert!(!json.contains("\"description\""));
assert!(!json.contains("\"thumbnail\""));
assert!(!json.contains("\"aliases\""));
assert!(!json.contains("\"urls\""));
assert!(json.contains("\"occurred_at\": \"2025-01-01\""));
}
#[test]
fn cross_file_resolution_with_registry() {
use std::path::PathBuf;
use weave_content::entity::Entity;
let entries = vec![registry::RegistryEntry {
entity: Entity {
name: "Mark Bonnick".to_string(),
label: entity::Label::Person,
fields: vec![(
"nationality".to_string(),
entity::FieldValue::Single("British".to_string()),
)],
id: Some("01JXYZ123456789ABCDEFGHIJK".to_string()),
line: 1,
tags: Vec::new(),
},
path: PathBuf::from("people/mark-bonnick.md"),
tags: Vec::new(),
}];
let reg = registry::EntityRegistry::from_entries(entries).unwrap();
let input = r"---
id: test-cross-ref
sources:
- https://example.com/src
---
# Cross Reference Test
Summary.
## Events
### Dismissal
- occurred_at: 2024-12-24
- event_type: dismissal
## Relationships
- Mark Bonnick -> Dismissal: associate_of
";
let err = parse_full(input, None).unwrap_err();
assert!(err.iter().any(|e| e.message.contains("Mark Bonnick")));
let (case, entities, rels) = parse_full(input, Some(®)).unwrap();
assert_eq!(case.id, "test-cross-ref");
assert_eq!(entities.len(), 1); assert_eq!(rels.len(), 1);
assert_eq!(rels[0].source_name, "Mark Bonnick");
assert_eq!(rels[0].target_name, "Dismissal");
}
}