use std::collections::HashSet;
use std::path::Path;
use clap::{Parser, Subcommand};
use rayon::prelude::*;
use weave_content::build_cache;
use weave_content::cache;
use weave_content::output;
use weave_content::registry;
use weave_content::staleness;
use weave_content::tags;
use weave_content::verifier;
use weave_content::{
build_case_output_tracked, load_registry, load_tag_registry, parse_full, resolve_case_files,
resolve_content_root,
};
#[cfg(test)]
use weave_content::entity;
#[derive(Parser)]
#[command(name = "weave-content", version, about)]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Validate {
path: Option<String>,
#[arg(long)]
root: Option<String>,
},
Verify {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(long, default_value_t = 16)]
concurrency: usize,
#[arg(long, default_value_t = 15)]
timeout: u64,
#[arg(long)]
cache: Option<String>,
#[arg(long)]
warn_only: bool,
},
Build {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(short, long)]
output: Option<String>,
#[arg(long)]
html: bool,
#[arg(long, default_value = "https://redberrythread.org")]
base_url: String,
#[arg(long)]
thumbnail_base_url: Option<String>,
#[arg(long)]
full: bool,
},
CheckStaleness {
path: Option<String>,
#[arg(long)]
root: Option<String>,
#[arg(long, default_value_t = 6)]
investigation_months: u32,
#[arg(long, default_value_t = 12)]
trial_months: u32,
#[arg(long, default_value_t = 12)]
appeal_months: u32,
},
}
fn main() {
let cli = Cli::parse();
let exit_code = match cli.command {
Command::Validate { ref path, ref root } => cmd_validate(path.as_deref(), root.as_deref()),
Command::Verify {
ref path,
ref root,
concurrency,
timeout,
ref cache,
warn_only,
} => cmd_verify(
path.as_deref(),
root.as_deref(),
concurrency,
timeout,
cache.as_deref(),
warn_only,
),
Command::Build {
ref path,
ref root,
ref output,
html,
ref base_url,
ref thumbnail_base_url,
full,
} => cmd_build(
path.as_deref(),
root.as_deref(),
output.as_deref(),
html,
base_url,
thumbnail_base_url.as_deref(),
full,
),
Command::CheckStaleness {
ref path,
ref root,
investigation_months,
trial_months,
appeal_months,
} => cmd_check_staleness(
path.as_deref(),
root.as_deref(),
investigation_months,
trial_months,
appeal_months,
),
};
std::process::exit(exit_code);
}
fn cmd_validate(path: Option<&str>, root: Option<&str>) -> i32 {
let content_root = resolve_content_root(path, root);
let redirect_slugs = load_redirect_slugs(&content_root);
if !redirect_slugs.is_empty() {
eprintln!("redirects: {} entries loaded", redirect_slugs.len());
let collision_count = check_redirect_collisions(&redirect_slugs, &content_root);
if collision_count > 0 {
eprintln!(
"error: {collision_count} file(s) at old redirected paths — move them or remove the redirect entry"
);
return 1;
}
}
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let tag_reg = match load_tag_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
if !reg.is_empty() {
eprintln!("registry: {} entities loaded", reg.len());
}
if !tag_reg.is_empty() {
eprintln!(
"tags: {} tags loaded across {} categories",
tag_reg.len(),
tag_reg.category_slugs().len()
);
}
let mut entity_tag_errors = false;
for entry in reg.entries() {
let tag_errors = tag_reg.validate_tags(&entry.tags, 2);
for err in &tag_errors {
eprintln!("{}:{err}", entry.path.display());
}
if !tag_errors.is_empty() {
entity_tag_errors = true;
}
}
let results: Vec<ValidateResult> = case_files
.par_iter()
.map(|case_path| validate_single_case(case_path, ®, &tag_reg))
.collect();
let mut exit_code = i32::from(entity_tag_errors);
let mut all_events: Vec<(String, String)> = Vec::new();
let mut all_rel_ids: Vec<(String, String, usize)> = Vec::new();
for result in results {
if result.exit_code != 0 {
exit_code = result.exit_code;
}
all_events.extend(result.events);
all_rel_ids.extend(result.rel_ids);
}
if let Some(code) = check_duplicate_event_names(&all_events) {
exit_code = code;
}
if let Some(code) = check_duplicate_rel_ids(&all_rel_ids) {
exit_code = code;
}
check_qualifier_consistency(®);
exit_code
}
struct ValidateResult {
exit_code: i32,
events: Vec<(String, String)>,
rel_ids: Vec<(String, String, usize)>,
}
fn validate_single_case(
path: &str,
reg: ®istry::EntityRegistry,
tag_reg: &tags::TagRegistry,
) -> ValidateResult {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
eprintln!("{path}: error reading file: {e}");
return ValidateResult {
exit_code: 2,
events: Vec::new(),
rel_ids: Vec::new(),
};
}
};
match parse_full(&content, Some(reg)) {
Ok((case, entities, rels)) => {
eprintln!(
"{path}: ok -- {id}: {title} ({ent} entities, {rel} relationships, {src} sources)",
id = case.id.as_deref().unwrap_or("(no id)"),
title = case.title,
ent = entities.len(),
rel = rels.len(),
src = case.sources.len(),
);
if !case.summary.is_empty() {
eprintln!(
" summary: {}...",
&case.summary[..case.summary.len().min(80)]
);
}
for e in &entities {
let id_display = e.id.as_deref().unwrap_or("(no id)");
eprintln!(
" line {}: {id_display} {} ({}, {} fields)",
e.line,
e.name,
e.label,
e.fields.len()
);
}
let events: Vec<(String, String)> = entities
.iter()
.filter(|e| e.label == weave_content::entity::Label::Event)
.map(|e| (e.name.clone(), path.to_string()))
.collect();
for r in &rels {
let id_display = r.id.as_deref().unwrap_or("(no id)");
eprintln!(
" line {}: {id_display} {} -> {}: {}",
r.line, r.source_name, r.target_name, r.rel_type,
);
}
let mut exit_code = 0;
let tag_errors = tag_reg.validate_tags(&case.tags, 2);
for err in &tag_errors {
eprintln!("{path}:{err}");
}
if !tag_errors.is_empty() {
exit_code = 1;
}
let rel_ids: Vec<(String, String, usize)> = rels
.iter()
.filter_map(|r| {
r.id.as_ref()
.map(|id| (id.clone(), path.to_string(), r.line))
})
.collect();
ValidateResult {
exit_code,
events,
rel_ids,
}
}
Err(errors) => {
for err in &errors {
eprintln!("{path}:{err}");
}
ValidateResult {
exit_code: 1,
events: Vec::new(),
rel_ids: Vec::new(),
}
}
}
}
fn check_duplicate_event_names(all_events: &[(String, String)]) -> Option<i32> {
let mut seen: std::collections::HashMap<&str, &str> = std::collections::HashMap::new();
let mut has_duplicates = false;
for (name, path) in all_events {
if let Some(&first_path) = seen.get(name.as_str()) {
eprintln!(
"error: duplicate event name {name:?} in {path} (first defined in {first_path})"
);
has_duplicates = true;
} else {
seen.insert(name, path);
}
}
if has_duplicates { Some(1) } else { None }
}
fn check_duplicate_rel_ids(all_rel_ids: &[(String, String, usize)]) -> Option<i32> {
let mut seen: std::collections::HashMap<&str, (&str, usize)> = std::collections::HashMap::new();
let mut has_duplicates = false;
for (id, path, line) in all_rel_ids {
if let Some(&(first_path, first_line)) = seen.get(id.as_str()) {
eprintln!(
"error: duplicate relationship id {id:?} at {path}:{line} (first defined at {first_path}:{first_line})"
);
has_duplicates = true;
} else {
seen.insert(id, (path, *line));
}
}
if has_duplicates { Some(1) } else { None }
}
fn check_qualifier_consistency(reg: ®istry::EntityRegistry) {
use weave_content::entity::FieldValue;
let mut by_lower: std::collections::HashMap<String, Vec<(String, String)>> =
std::collections::HashMap::new();
for entry in reg.entries() {
let qualifier = entry
.entity
.fields
.iter()
.find(|(k, _)| k == "qualifier")
.and_then(|(_, v)| match v {
FieldValue::Single(s) => Some(s.as_str()),
FieldValue::List(_) => None,
});
if let Some(q) = qualifier {
by_lower
.entry(q.to_lowercase())
.or_default()
.push((q.to_string(), entry.path.display().to_string()));
}
}
for occurrences in by_lower.values() {
let first = &occurrences[0].0;
let inconsistent: Vec<_> = occurrences.iter().filter(|(q, _)| q != first).collect();
if !inconsistent.is_empty() {
eprintln!(
"warning: inconsistent qualifier casing for {:?}:",
occurrences[0].0
);
for (q, path) in occurrences {
eprintln!(" {path}: {q:?}");
}
}
}
}
fn load_redirect_slugs(content_root: &Path) -> HashSet<String> {
let path = content_root.join("redirects.yaml");
let content = match std::fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => return HashSet::new(),
};
let mut slugs = HashSet::new();
for line in content.lines() {
if let Some(from) = line.strip_prefix(" - from: ") {
slugs.insert(from.trim().to_string());
}
}
slugs
}
fn check_redirect_collisions(
redirect_slugs: &HashSet<String>,
content_root: &Path,
) -> usize {
if redirect_slugs.is_empty() {
return 0;
}
let mut errors = 0;
let root_str = content_root.to_string_lossy();
for slug in redirect_slugs {
let file_path = content_root.join(format!("{slug}.md"));
if file_path.exists() {
eprintln!(
"{root_str}/{slug}.md: error: file at old redirected path (see redirects.yaml)"
);
errors += 1;
}
}
errors
}
fn cmd_check_staleness(
path: Option<&str>,
root: Option<&str>,
investigation_months: u32,
trial_months: u32,
appeal_months: u32,
) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let thresholds = staleness::Thresholds {
investigation_months,
trial_months,
appeal_months,
};
let now = chrono_today();
let mut all_findings: Vec<(String, staleness::Finding)> = Vec::new();
for case_path in &case_files {
let content = match std::fs::read_to_string(case_path) {
Ok(c) => c,
Err(e) => {
eprintln!("{case_path}: error reading file: {e}");
continue;
}
};
let (case, entities, _rels) = match parse_full(&content, Some(®)) {
Ok(result) => result,
Err(errors) => {
for err in &errors {
eprintln!("{case_path}:{err}");
}
continue;
}
};
let findings = staleness::check_case(&case, &entities, &thresholds, now);
for finding in findings {
all_findings.push((case_path.clone(), finding));
}
}
all_findings.sort_by_key(|a| a.1.severity);
let mut errors = 0u32;
let mut warnings = 0u32;
let mut infos = 0u32;
for (path, finding) in &all_findings {
eprintln!("{}: {path}: {}", finding.severity, finding.message);
match finding.severity {
staleness::Severity::Error => errors += 1,
staleness::Severity::Warning => warnings += 1,
staleness::Severity::Info => infos += 1,
}
}
eprintln!(
"staleness: {errors} error(s), {warnings} warning(s), {infos} info(s) across {} case(s)",
case_files.len()
);
i32::from(errors > 0)
}
fn chrono_today() -> (i32, u32, u32) {
let now = std::time::SystemTime::now();
let since_epoch = now
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default();
let days = since_epoch.as_secs() / 86400;
days_to_date(days)
}
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss, clippy::cast_possible_wrap)]
fn days_to_date(days: u64) -> (i32, u32, u32) {
let z = i64::from(u32::try_from(days).unwrap_or(u32::MAX)) + 719_468;
let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
let doe = (z - era * 146_097) as u32;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let y = (i64::from(yoe) + era * 400) as i32;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let d = doy - (153 * mp + 2) / 5 + 1;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
let y = if m <= 2 { y + 1 } else { y };
(y, m, d)
}
fn cmd_verify(
path: Option<&str>,
root: Option<&str>,
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let mut exit_code = 0;
for case_path in &case_files {
let result =
verify_single_case(case_path, ®, concurrency, timeout, cache_path, warn_only);
if result != 0 {
exit_code = result;
}
}
let reg_result = verify_registry_thumbnails(®, concurrency, timeout, cache_path, warn_only);
if reg_result != 0 {
exit_code = reg_result;
}
exit_code
}
fn verify_single_case(
path: &str,
reg: ®istry::EntityRegistry,
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
eprintln!("{path}: error reading file: {e}");
return 2;
}
};
let (case, entities, rels) = match parse_full(&content, Some(reg)) {
Ok(result) => result,
Err(errors) => {
for err in &errors {
eprintln!("{path}:{err}");
}
return 1;
}
};
let mut collect_errors = Vec::new();
let urls = verifier::collect_urls(&case.sources, &entities, &rels, &mut collect_errors);
if !collect_errors.is_empty() {
for err in &collect_errors {
eprintln!("{path}:{err}");
}
return 1;
}
if urls.is_empty() {
eprintln!("{path}: no URLs to verify");
return 0;
}
run_url_verification(path, &urls, concurrency, timeout, cache_path, warn_only)
}
fn verify_registry_thumbnails(
reg: ®istry::EntityRegistry,
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let urls = verifier::collect_registry_urls(reg);
if urls.is_empty() {
return 0;
}
run_url_verification(
"(registry)",
&urls,
concurrency,
timeout,
cache_path,
warn_only,
)
}
fn run_url_verification(
label: &str,
urls: &[verifier::UrlEntry],
concurrency: usize,
timeout: u64,
cache_path: Option<&str>,
warn_only: bool,
) -> i32 {
let mut verify_cache = load_verify_cache(label, cache_path);
let (cached_results, urls_to_check) = partition_cached(urls, verify_cache.as_ref());
let check_count = urls_to_check.len();
let cached_count = cached_results.len();
if cached_count > 0 {
eprintln!(
"{label}: {cached_count} cached, {check_count} to check \
(concurrency={concurrency}, timeout={timeout}s)"
);
} else {
eprintln!(
"{label}: verifying {check_count} URLs \
(concurrency={concurrency}, timeout={timeout}s)"
);
}
let fresh_results = if urls_to_check.is_empty() {
Vec::new()
} else {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
eprintln!("{label}: failed to create async runtime: {e}");
return 2;
}
};
rt.block_on(verifier::verify_urls(urls_to_check, concurrency, timeout))
};
if let Some(ref mut vc) = verify_cache {
for check in &fresh_results {
vc.put(&check.url, check.status, check.detail.as_deref());
}
}
let mut all_results = cached_results;
all_results.extend(fresh_results);
let has_error = print_verification_results(label, &all_results);
if let Some(ref vc) = verify_cache
&& let Err(e) = vc.save()
{
eprintln!("{label}: cache save warning: {e}");
}
i32::from(has_error && !warn_only)
}
fn load_verify_cache(label: &str, cache_path: Option<&str>) -> Option<cache::VerifyCache> {
cache_path.map(|p| match cache::VerifyCache::load(p) {
Ok(c) => {
eprintln!("{label}: using cache {p}");
c
}
Err(e) => {
eprintln!("{label}: cache load warning: {e}");
cache::VerifyCache::load("/dev/null").unwrap_or_else(|_| cache::VerifyCache::empty())
}
})
}
fn print_verification_results(label: &str, results: &[verifier::UrlCheck]) -> bool {
let mut has_error = false;
for check in results {
let detail = check.detail.as_deref().unwrap_or("");
match check.status {
verifier::CheckStatus::Ok => {
eprintln!(
" ok {}{}",
check.url,
if check.is_thumbnail {
" [thumbnail]"
} else {
""
}
);
}
verifier::CheckStatus::Warn => {
eprintln!(" warn {} -- {detail}", check.url);
}
verifier::CheckStatus::Error => {
has_error = true;
eprintln!(" ERROR {} -- {detail}", check.url);
}
}
}
let ok_count = results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Ok)
.count();
let warn_count = results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Warn)
.count();
let err_count = results
.iter()
.filter(|c| c.status == verifier::CheckStatus::Error)
.count();
eprintln!("{label}: {ok_count} ok, {warn_count} warn, {err_count} error");
has_error
}
fn partition_cached(
urls: &[verifier::UrlEntry],
verify_cache: Option<&cache::VerifyCache>,
) -> (Vec<verifier::UrlCheck>, Vec<verifier::UrlEntry>) {
let Some(vc) = verify_cache else {
return (Vec::new(), urls.to_vec());
};
let mut cached = Vec::new();
let mut uncached = Vec::new();
for entry in urls {
if let Some(cache_entry) = vc.get(entry.url()) {
let status = match cache_entry.status.as_str() {
"ok" => verifier::CheckStatus::Ok,
"warn" => verifier::CheckStatus::Warn,
_ => verifier::CheckStatus::Error,
};
cached.push(verifier::UrlCheck {
url: entry.url().to_string(),
status,
detail: cache_entry.detail.clone(),
is_thumbnail: entry.is_thumbnail(),
});
} else {
uncached.push(entry.clone());
}
}
(cached, uncached)
}
fn cmd_build(
path: Option<&str>,
root: Option<&str>,
output_dir: Option<&str>,
generate_html: bool,
base_url: &str,
thumbnail_base_url: Option<&str>,
force_full: bool,
) -> i32 {
let content_root = resolve_content_root(path, root);
let reg = match load_registry(&content_root) {
Ok(r) => r,
Err(code) => return code,
};
let case_files = match resolve_case_files(path, &content_root) {
Ok(f) => f,
Err(code) => return code,
};
if case_files.is_empty() {
eprintln!("no case files found");
return 1;
}
let mut build_cache = load_build_cache(&content_root, force_full);
let file_hashes = hash_content_files(®, &case_files);
let case_nulid_map = match weave_content::build_case_index(&case_files, &content_root) {
Ok(m) => m,
Err(code) => return code,
};
let (exit_code, all_outputs) = build_cases(
&case_files,
®,
&case_nulid_map,
&file_hashes,
&mut build_cache,
output_dir,
generate_html,
force_full,
);
finalize_build_cache(&mut build_cache, &case_files, &file_hashes);
let mut exit_code = exit_code;
if generate_html {
if let Some(dir) = output_dir {
let html_result = weave_content::generate_html_output(
dir,
&all_outputs,
base_url,
thumbnail_base_url,
);
if html_result != 0 {
exit_code = html_result;
}
} else {
eprintln!("--html requires --output directory");
exit_code = 1;
}
}
exit_code
}
fn load_build_cache(content_root: &std::path::Path, force_full: bool) -> build_cache::BuildCache {
if force_full {
build_cache::BuildCache::empty()
} else {
match build_cache::BuildCache::load(content_root) {
Ok(c) => c,
Err(e) => {
eprintln!("warning: {e}, starting fresh");
build_cache::BuildCache::empty()
}
}
}
}
fn hash_content_files(
reg: ®istry::EntityRegistry,
case_files: &[String],
) -> std::collections::HashMap<String, String> {
let mut file_hashes = std::collections::HashMap::new();
for entry in reg.entries() {
if let Some(path_str) = entry.path.to_str()
&& let Ok(hash) = build_cache::hash_file(&entry.path)
{
file_hashes.insert(path_str.to_string(), hash);
}
}
for case_path in case_files {
if let Ok(hash) = build_cache::hash_file(std::path::Path::new(case_path)) {
file_hashes.insert(case_path.clone(), hash);
}
}
file_hashes
}
fn build_cases(
case_files: &[String],
reg: ®istry::EntityRegistry,
case_nulid_map: &std::collections::HashMap<String, (String, String)>,
file_hashes: &std::collections::HashMap<String, String>,
build_cache: &mut build_cache::BuildCache,
output_dir: Option<&str>,
collect_html: bool,
force_full: bool,
) -> (i32, Vec<output::CaseOutput>) {
let mut exit_code = 0;
let mut written_entities = std::collections::HashSet::new();
let mut all_outputs = Vec::new();
let mut skipped = 0usize;
for case_path in case_files {
if !force_full
&& !collect_html
&& let Some(current_hash) = file_hashes.get(case_path)
&& build_cache.is_unchanged_with_hashes(case_path, current_hash, file_hashes)
{
skipped += 1;
continue;
}
match build_case_output_tracked(case_path, reg, &mut written_entities, case_nulid_map) {
Ok(case_output) => {
let write_result =
write_case_output(case_path, &case_output.case_id, &case_output, output_dir);
if write_result != 0 {
exit_code = write_result;
}
let deps: Vec<String> = case_output
.nodes
.iter()
.filter_map(|n| {
reg.get_by_name(&n.name)
.and_then(|e| e.path.to_str().map(String::from))
})
.collect();
if let Some(hash) = file_hashes.get(case_path) {
build_cache.put(case_path, hash.clone(), deps);
}
if collect_html {
all_outputs.push(case_output);
}
}
Err(code) => {
exit_code = code;
}
}
}
if skipped > 0 {
eprintln!("incremental: {skipped} case(s) unchanged, skipped");
}
(exit_code, all_outputs)
}
fn finalize_build_cache(
build_cache: &mut build_cache::BuildCache,
case_files: &[String],
file_hashes: &std::collections::HashMap<String, String>,
) {
for (path_str, hash) in file_hashes {
if !case_files.contains(path_str) {
build_cache.put(path_str, hash.clone(), vec![]);
}
}
let all_files: std::collections::HashSet<String> = file_hashes.keys().cloned().collect();
build_cache.prune(&all_files);
if let Err(e) = build_cache.save() {
eprintln!("warning: failed to save build cache: {e}");
} else if !build_cache.is_empty() {
eprintln!("build cache: {} entries saved", build_cache.len());
}
}
fn write_case_output(
path: &str,
case_id: &str,
case_output: &output::CaseOutput,
output_dir: Option<&str>,
) -> i32 {
match output_dir {
Some(dir) => {
let out_path = format!("{dir}/{case_id}.json");
match serde_json::to_string_pretty(case_output) {
Ok(json) => {
if let Err(e) = std::fs::write(&out_path, json) {
eprintln!("{out_path}: error writing file: {e}");
return 2;
}
eprintln!("{path} -> {out_path}");
}
Err(e) => {
eprintln!("{path}: JSON serialization error: {e}");
return 2;
}
}
}
None => match serde_json::to_string_pretty(case_output) {
Ok(json) => println!("{json}"),
Err(e) => {
eprintln!("{path}: JSON serialization error: {e}");
return 2;
}
},
}
0
}
#[cfg(test)]
mod tests {
use super::*;
const FULL_CASE: &str = r"---
id: 01JABC000000000000000000AA
sources:
- https://www.theguardian.com/football/2025/feb/03/bonnick
- https://novaramedia.com/2025/02/04/bonnick
---
# Bonnick v Arsenal FC
Kit manager dismissed over social media posts about Israel-Gaza.
## Events
### Bonnick dismissal
- occurred_at: 2024-12-24
- event_type: dismissal
- description: Arsenal dismisses Bonnick over social media posts
regarding Israel-Gaza conflict.
### FA investigation finding
- occurred_at: 2024
- event_type: investigation_closed
- description: FA investigates and finds the posts did not breach
FA rules. Matter closed by FA.
### Employment tribunal filing
- occurred_at: 2025-02-03
- event_type: custom:Employment Tribunal
- description: Bonnick files employment tribunal claim against Arsenal.
## Relationships
- Bonnick dismissal -> FA investigation finding: preceded_by
- FA investigation finding -> Employment tribunal filing: preceded_by
- Bonnick dismissal -> Employment tribunal filing: references
source: https://novaramedia.com/2025/02/04/bonnick
## Timeline
- Bonnick dismissal -> FA investigation finding
- FA investigation finding -> Employment tribunal filing
";
#[test]
fn parse_full_case_file() {
let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();
assert_eq!(case.id.as_deref(), Some("01JABC000000000000000000AA"));
assert_eq!(case.title, "Bonnick v Arsenal FC");
assert!(case.summary.contains("Kit manager dismissed"));
assert_eq!(case.sources.len(), 2);
assert_eq!(entities.len(), 3);
assert!(entities.iter().all(|e| e.label == entity::Label::Event));
let dismissal = entities
.iter()
.find(|e| e.name == "Bonnick dismissal")
.unwrap();
assert_eq!(dismissal.label, entity::Label::Event);
assert_eq!(rels.len(), 5);
let timeline_rels: Vec<_> = rels
.iter()
.filter(|r| r.rel_type == "preceded_by" && r.source_urls.is_empty())
.collect();
assert_eq!(timeline_rels.len(), 2);
assert_eq!(timeline_rels[0].source_name, "Bonnick dismissal");
assert_eq!(timeline_rels[0].target_name, "FA investigation finding");
assert_eq!(timeline_rels[1].source_name, "FA investigation finding");
assert_eq!(timeline_rels[1].target_name, "Employment tribunal filing");
}
#[test]
fn parse_full_minimal_case() {
let input = r"---
sources:
- https://example.com/source
---
# Minimal Test Case
A simple test.
## Events
### Something happened
- occurred_at: 2025-01-01
- event_type: conviction
";
let (case, entities, rels) = parse_full(input, None).unwrap();
assert!(case.id.is_none());
assert_eq!(case.title, "Minimal Test Case");
assert_eq!(entities.len(), 1);
assert_eq!(entities[0].name, "Something happened");
assert!(rels.is_empty());
}
#[test]
fn json_snapshot_full_case() {
let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();
let build_result = output::build_output(
"bonnick-v-arsenal",
"01TEST00000000000000000000",
&case.title,
&case.summary,
&case.tags,
None,
case.case_type.as_deref(),
case.status.as_deref(),
case.amounts.as_deref(),
case.tagline.as_deref(),
&case.sources,
&case.related_cases,
&std::collections::HashMap::new(),
&entities,
&rels,
&[],
&case.involved,
)
.unwrap();
let json = serde_json::to_string_pretty(&build_result.output).unwrap();
assert!(json.contains("\"case_id\": \"bonnick-v-arsenal\""));
assert!(json.contains("\"title\": \"Bonnick v Arsenal FC\""));
assert!(json.contains("\"label\": \"event\""));
assert!(json.contains("\"name\": \"Bonnick dismissal\""));
assert!(json.contains("\"name\": \"FA investigation finding\""));
assert!(json.contains("\"event_type\": \"dismissal\""));
assert!(json.contains("\"event_type\": \"investigation_closed\""));
assert!(json.contains("\"type\": \"preceded_by\""));
assert!(json.contains("\"type\": \"references\""));
let output: serde_json::Value = serde_json::from_str(&json).unwrap();
let nodes = output["nodes"].as_array().unwrap();
let rels_arr = output["relationships"].as_array().unwrap();
for node in nodes {
let id = node["id"].as_str().unwrap();
assert!(!id.is_empty());
assert!(id.len() >= 20);
}
for rel in rels_arr {
let id = rel["id"].as_str().unwrap();
assert!(!id.is_empty());
}
let node_ids: Vec<&str> = nodes.iter().map(|n| n["id"].as_str().unwrap()).collect();
for rel in rels_arr {
let source_id = rel["source_id"].as_str().unwrap();
let target_id = rel["target_id"].as_str().unwrap();
assert!(
node_ids.contains(&source_id),
"source_id {source_id} not found in nodes"
);
assert!(
node_ids.contains(&target_id),
"target_id {target_id} not found in nodes"
);
}
}
#[test]
fn json_snapshot_omits_empty_fields() {
let input = r"---
sources:
- https://example.com/src
---
# Sparse Case
Summary.
## Events
### Something
- occurred_at: 2025-01-01
";
let (case, entities, rels) = parse_full(input, None).unwrap();
let build_result = output::build_output(
"sparse",
"01TEST00000000000000000000",
&case.title,
&case.summary,
&case.tags,
None,
case.case_type.as_deref(),
case.status.as_deref(),
case.amounts.as_deref(),
case.tagline.as_deref(),
&case.sources,
&case.related_cases,
&std::collections::HashMap::new(),
&entities,
&rels,
&[],
&case.involved,
)
.unwrap();
let json = serde_json::to_string_pretty(&build_result.output).unwrap();
assert!(!json.contains("\"qualifier\""));
assert!(!json.contains("\"thumbnail\""));
assert!(!json.contains("\"aliases\""));
assert!(!json.contains("\"urls\""));
assert!(json.contains("\"occurred_at\": \"2025-01-01\""));
}
#[test]
fn cross_file_resolution_with_registry() {
use std::path::PathBuf;
use weave_content::entity::Entity;
let entries = vec![registry::RegistryEntry {
entity: Entity {
name: "Mark Bonnick".to_string(),
label: entity::Label::Person,
fields: vec![(
"nationality".to_string(),
entity::FieldValue::Single("British".to_string()),
)],
id: Some("01JXYZ123456789ABCDEFGHIJK".to_string()),
line: 1,
tags: Vec::new(),
slug: None,
},
path: PathBuf::from("people/mark-bonnick.md"),
tags: Vec::new(),
}];
let reg = registry::EntityRegistry::from_entries(entries).unwrap();
let input = r"---
sources:
- https://example.com/src
---
# Cross Reference Test
Summary.
## Events
### Dismissal
- occurred_at: 2024-12-24
- event_type: dismissal
## Relationships
- Mark Bonnick -> Dismissal: associate_of
";
let err = parse_full(input, None).unwrap_err();
assert!(err.iter().any(|e| e.message.contains("Mark Bonnick")));
let (case, entities, rels) = parse_full(input, Some(®)).unwrap();
assert!(case.id.is_none());
assert_eq!(entities.len(), 1); assert_eq!(rels.len(), 1);
assert_eq!(rels[0].source_name, "Mark Bonnick");
assert_eq!(rels[0].target_name, "Dismissal");
}
}