weave-content 0.2.10

use clap::{Parser, Subcommand};
use rayon::prelude::*;

use weave_content::build_cache;
use weave_content::cache;
use weave_content::output;
use weave_content::registry;
use weave_content::tags;
use weave_content::verifier;
use weave_content::{
    build_case_output_tracked, load_registry, load_tag_registry, parse_full, resolve_case_files,
    resolve_content_root,
};

#[cfg(test)]
use weave_content::entity;

/// Content DSL parser, validator, and builder for OSINT case files.
#[derive(Parser)]
#[command(name = "weave-content", version, about)]
struct Cli {
    #[command(subcommand)]
    command: Command,
}

#[derive(Subcommand)]
enum Command {
    /// Validate case files (parse and check schema).
    Validate {
        /// Path to case file, or content root directory.
        /// When a directory is given, auto-discovers cases/**/*.md.
        path: Option<String>,

        /// Content root directory (for loading entity registry).
        /// Defaults to the parent of the given path, or current directory.
        #[arg(long)]
        root: Option<String>,
    },
    /// Verify URLs in case files (HEAD/GET checks).
    Verify {
        /// Path to case file, or content root directory.
        path: Option<String>,

        /// Content root directory.
        #[arg(long)]
        root: Option<String>,

        /// Maximum concurrent requests.
        #[arg(long, default_value_t = 16)]
        concurrency: usize,

        /// Per-URL timeout in seconds.
        #[arg(long, default_value_t = 15)]
        timeout: u64,

        /// Path to URL verification cache file.
        #[arg(long)]
        cache: Option<String>,

        /// Report all as warnings, never fail.
        #[arg(long)]
        warn_only: bool,
    },
    /// Build JSON output from case files.
    Build {
        /// Path to case file, or content root directory.
        path: Option<String>,

        /// Content root directory.
        #[arg(long)]
        root: Option<String>,

        /// Output directory (one JSON per case). Stdout if omitted.
        #[arg(short, long)]
        output: Option<String>,

        /// Also generate static HTML files in the output directory.
        #[arg(long)]
        html: bool,

        /// Base URL for sitemap generation (used with --html).
        #[arg(long, default_value = "https://redberrythread.org")]
        base_url: String,

        /// Force full rebuild, bypassing the incremental build cache.
        #[arg(long)]
        full: bool,
    },
}

fn main() {
    let cli = Cli::parse();

    let exit_code = match cli.command {
        Command::Validate { ref path, ref root } => cmd_validate(path.as_deref(), root.as_deref()),
        Command::Verify {
            ref path,
            ref root,
            concurrency,
            timeout,
            ref cache,
            warn_only,
        } => cmd_verify(
            path.as_deref(),
            root.as_deref(),
            concurrency,
            timeout,
            cache.as_deref(),
            warn_only,
        ),
        Command::Build {
            ref path,
            ref root,
            ref output,
            html,
            ref base_url,
            full,
        } => cmd_build(
            path.as_deref(),
            root.as_deref(),
            output.as_deref(),
            html,
            base_url,
            full,
        ),
    };

    std::process::exit(exit_code);
}

fn cmd_validate(path: Option<&str>, root: Option<&str>) -> i32 {
    let content_root = resolve_content_root(path, root);
    let reg = match load_registry(&content_root) {
        Ok(r) => r,
        Err(code) => return code,
    };

    let tag_reg = match load_tag_registry(&content_root) {
        Ok(r) => r,
        Err(code) => return code,
    };

    let case_files = match resolve_case_files(path, &content_root) {
        Ok(f) => f,
        Err(code) => return code,
    };

    if case_files.is_empty() {
        eprintln!("no case files found");
        return 1;
    }

    if !reg.is_empty() {
        eprintln!("registry: {} entities loaded", reg.len());
    }
    if !tag_reg.is_empty() {
        eprintln!(
            "tags: {} tags loaded across {} categories",
            tag_reg.len(),
            tag_reg.category_slugs().len()
        );
    }

    // Validate entity tags against registry
    let mut entity_tag_errors = false;
    for entry in reg.entries() {
        let tag_errors = tag_reg.validate_tags(&entry.tags, 2);
        for err in &tag_errors {
            eprintln!("{}:{err}", entry.path.display());
        }
        if !tag_errors.is_empty() {
            entity_tag_errors = true;
        }
    }

    // Parse all case files in parallel
    let results: Vec<ValidateResult> = case_files
        .par_iter()
        .map(|case_path| validate_single_case(case_path, &reg, &tag_reg))
        .collect();

    let mut exit_code = i32::from(entity_tag_errors);
    let mut all_events: Vec<(String, String)> = Vec::new();

    for result in results {
        if result.exit_code != 0 {
            exit_code = result.exit_code;
        }
        all_events.extend(result.events);
    }

    // Cross-file duplicate event name detection
    if let Some(code) = check_duplicate_event_names(&all_events) {
        exit_code = code;
    }

    // Cross-file qualifier consistency check (warnings only)
    check_qualifier_consistency(&reg);

    exit_code
}

/// Result of validating a single case file.
struct ValidateResult {
    exit_code: i32,
    events: Vec<(String, String)>,
}

fn validate_single_case(
    path: &str,
    reg: &registry::EntityRegistry,
    tag_reg: &tags::TagRegistry,
) -> ValidateResult {
    let content = match std::fs::read_to_string(path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("{path}: error reading file: {e}");
            return ValidateResult {
                exit_code: 2,
                events: Vec::new(),
            };
        }
    };

    match parse_full(&content, Some(reg)) {
        Ok((case, entities, rels)) => {
            eprintln!(
                "{path}: ok -- {id}: {title} ({ent} entities, {rel} relationships, {src} sources)",
                id = case.id,
                title = case.title,
                ent = entities.len(),
                rel = rels.len(),
                src = case.sources.len(),
            );
            if !case.summary.is_empty() {
                eprintln!(
                    "  summary: {}...",
                    &case.summary[..case.summary.len().min(80)]
                );
            }
            for e in &entities {
                let id_display = e.id.as_deref().unwrap_or("(no id)");
                eprintln!(
                    "  line {}: {id_display} {} ({}, {} fields)",
                    e.line,
                    e.name,
                    e.label,
                    e.fields.len()
                );
            }

            // Collect event names for cross-file duplicate detection
            let events: Vec<(String, String)> = entities
                .iter()
                .filter(|e| e.label == weave_content::entity::Label::Event)
                .map(|e| (e.name.clone(), path.to_string()))
                .collect();

            for r in &rels {
                let id_display = r.id.as_deref().unwrap_or("(no id)");
                eprintln!(
                    "  line {}: {id_display} {} -> {}: {}",
                    r.line, r.source_name, r.target_name, r.rel_type,
                );
            }

            // Validate tags against registry
            let mut exit_code = 0;
            let tag_errors = tag_reg.validate_tags(&case.tags, 2);
            for err in &tag_errors {
                eprintln!("{path}:{err}");
            }
            if !tag_errors.is_empty() {
                exit_code = 1;
            }

            ValidateResult { exit_code, events }
        }
        Err(errors) => {
            for err in &errors {
                eprintln!("{path}:{err}");
            }
            ValidateResult {
                exit_code: 1,
                events: Vec::new(),
            }
        }
    }
}

/// Check for duplicate event names across all case files.
/// Returns `Some(1)` if duplicates found, `None` otherwise.
fn check_duplicate_event_names(all_events: &[(String, String)]) -> Option<i32> {
    let mut seen: std::collections::HashMap<&str, &str> = std::collections::HashMap::new();
    let mut has_duplicates = false;

    for (name, path) in all_events {
        if let Some(&first_path) = seen.get(name.as_str()) {
            eprintln!(
                "error: duplicate event name {name:?} in {path} (first defined in {first_path})"
            );
            has_duplicates = true;
        } else {
            seen.insert(name, path);
        }
    }

    if has_duplicates { Some(1) } else { None }
}

/// Warn about qualifier values that differ only by casing across registry
/// entity files (e.g. "Indonesian political party" vs "Indonesian Political Party").
fn check_qualifier_consistency(reg: &registry::EntityRegistry) {
    use weave_content::entity::FieldValue;

    // Collect (lowercase_qualifier -> Vec<(original_qualifier, file_path)>)
    let mut by_lower: std::collections::HashMap<String, Vec<(String, String)>> =
        std::collections::HashMap::new();

    for entry in reg.entries() {
        let qualifier = entry
            .entity
            .fields
            .iter()
            .find(|(k, _)| k == "qualifier")
            .and_then(|(_, v)| match v {
                FieldValue::Single(s) => Some(s.as_str()),
                FieldValue::List(_) => None,
            });

        if let Some(q) = qualifier {
            by_lower
                .entry(q.to_lowercase())
                .or_default()
                .push((q.to_string(), entry.path.display().to_string()));
        }
    }

    for occurrences in by_lower.values() {
        // Check if all original values are identical
        let first = &occurrences[0].0;
        let inconsistent: Vec<_> = occurrences.iter().filter(|(q, _)| q != first).collect();
        if !inconsistent.is_empty() {
            eprintln!(
                "warning: inconsistent qualifier casing for {:?}:",
                occurrences[0].0
            );
            for (q, path) in occurrences {
                eprintln!("  {path}: {q:?}");
            }
        }
    }
}

#[allow(clippy::too_many_lines)]
fn cmd_verify(
    path: Option<&str>,
    root: Option<&str>,
    concurrency: usize,
    timeout: u64,
    cache_path: Option<&str>,
    warn_only: bool,
) -> i32 {
    let content_root = resolve_content_root(path, root);
    let reg = match load_registry(&content_root) {
        Ok(r) => r,
        Err(code) => return code,
    };

    let case_files = match resolve_case_files(path, &content_root) {
        Ok(f) => f,
        Err(code) => return code,
    };

    if case_files.is_empty() {
        eprintln!("no case files found");
        return 1;
    }

    let mut exit_code = 0;
    for case_path in &case_files {
        let result =
            verify_single_case(case_path, &reg, concurrency, timeout, cache_path, warn_only);
        if result != 0 {
            exit_code = result;
        }
    }

    // Verify registry entity thumbnails (people/organizations)
    let reg_result = verify_registry_thumbnails(&reg, concurrency, timeout, cache_path, warn_only);
    if reg_result != 0 {
        exit_code = reg_result;
    }

    exit_code
}

#[allow(clippy::too_many_lines)]
fn verify_single_case(
    path: &str,
    reg: &registry::EntityRegistry,
    concurrency: usize,
    timeout: u64,
    cache_path: Option<&str>,
    warn_only: bool,
) -> i32 {
    let content = match std::fs::read_to_string(path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("{path}: error reading file: {e}");
            return 2;
        }
    };

    let (case, entities, rels) = match parse_full(&content, Some(reg)) {
        Ok(result) => result,
        Err(errors) => {
            for err in &errors {
                eprintln!("{path}:{err}");
            }
            return 1;
        }
    };

    let mut collect_errors = Vec::new();
    let urls = verifier::collect_urls(&case.sources, &entities, &rels, &mut collect_errors);

    if !collect_errors.is_empty() {
        for err in &collect_errors {
            eprintln!("{path}:{err}");
        }
        return 1;
    }

    if urls.is_empty() {
        eprintln!("{path}: no URLs to verify");
        return 0;
    }

    // Load cache if path provided
    let mut verify_cache = cache_path.map(|p| match cache::VerifyCache::load(p) {
        Ok(c) => {
            eprintln!("{path}: using cache {p}");
            c
        }
        Err(e) => {
            eprintln!("{path}: cache load warning: {e}");
            cache::VerifyCache::load("/dev/null").unwrap_or_else(|_| {
                // Fallback: in-memory only, won't save
                cache::VerifyCache::empty()
            })
        }
    });

    // Partition URLs into cached and uncached
    let (cached_results, urls_to_check) = partition_cached(&urls, verify_cache.as_ref());

    let check_count = urls_to_check.len();
    let cached_count = cached_results.len();

    if cached_count > 0 {
        eprintln!(
            "{path}: {cached_count} cached, {check_count} to check (concurrency={concurrency}, timeout={timeout}s)"
        );
    } else {
        eprintln!(
            "{path}: verifying {check_count} URLs (concurrency={concurrency}, timeout={timeout}s)"
        );
    }

    let fresh_results = if urls_to_check.is_empty() {
        Vec::new()
    } else {
        let rt = match tokio::runtime::Builder::new_current_thread()
            .enable_all()
            .build()
        {
            Ok(rt) => rt,
            Err(e) => {
                eprintln!("{path}: failed to create async runtime: {e}");
                return 2;
            }
        };
        rt.block_on(verifier::verify_urls(urls_to_check, concurrency, timeout))
    };

    // Update cache with fresh results
    if let Some(ref mut vc) = verify_cache {
        for check in &fresh_results {
            vc.put(&check.url, check.status, check.detail.as_deref());
        }
    }

    // Combine cached + fresh results
    let mut all_results = cached_results;
    all_results.extend(fresh_results);

    let mut has_error = false;

    for check in &all_results {
        let detail = check.detail.as_deref().unwrap_or("");
        match check.status {
            verifier::CheckStatus::Ok => {
                eprintln!(
                    "  ok  {}{}",
                    check.url,
                    if check.is_thumbnail {
                        " [thumbnail]"
                    } else {
                        ""
                    }
                );
            }
            verifier::CheckStatus::Warn => {
                eprintln!("  warn  {} -- {detail}", check.url);
            }
            verifier::CheckStatus::Error => {
                has_error = true;
                eprintln!("  ERROR {} -- {detail}", check.url);
            }
        }
    }

    let ok_count = all_results
        .iter()
        .filter(|c| c.status == verifier::CheckStatus::Ok)
        .count();
    let warn_count = all_results
        .iter()
        .filter(|c| c.status == verifier::CheckStatus::Warn)
        .count();
    let err_count = all_results
        .iter()
        .filter(|c| c.status == verifier::CheckStatus::Error)
        .count();

    eprintln!("{path}: {ok_count} ok, {warn_count} warn, {err_count} error");

    // Save cache
    if let Some(ref vc) = verify_cache
        && let Err(e) = vc.save()
    {
        eprintln!("{path}: cache save warning: {e}");
    }

    i32::from(has_error && !warn_only)
}

/// Verify thumbnail URLs from registry entities (people/organizations).
fn verify_registry_thumbnails(
    reg: &registry::EntityRegistry,
    concurrency: usize,
    timeout: u64,
    cache_path: Option<&str>,
    warn_only: bool,
) -> i32 {
    let urls = verifier::collect_registry_urls(reg);
    if urls.is_empty() {
        return 0;
    }

    let label = "(registry)";

    // Load cache if path provided
    let mut verify_cache = cache_path.map(|p| match cache::VerifyCache::load(p) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("{label}: cache load warning: {e}");
            cache::VerifyCache::load("/dev/null").unwrap_or_else(|_| cache::VerifyCache::empty())
        }
    });

    let (cached_results, urls_to_check) = partition_cached(&urls, verify_cache.as_ref());
    let check_count = urls_to_check.len();
    let cached_count = cached_results.len();

    if cached_count > 0 {
        eprintln!(
            "{label}: {cached_count} cached, {check_count} to check (concurrency={concurrency}, timeout={timeout}s)"
        );
    } else {
        eprintln!(
            "{label}: verifying {check_count} thumbnail URLs (concurrency={concurrency}, timeout={timeout}s)"
        );
    }

    let fresh_results = if urls_to_check.is_empty() {
        Vec::new()
    } else {
        let rt = match tokio::runtime::Builder::new_current_thread()
            .enable_all()
            .build()
        {
            Ok(rt) => rt,
            Err(e) => {
                eprintln!("{label}: failed to create async runtime: {e}");
                return 2;
            }
        };
        rt.block_on(verifier::verify_urls(urls_to_check, concurrency, timeout))
    };

    if let Some(ref mut vc) = verify_cache {
        for check in &fresh_results {
            vc.put(&check.url, check.status, check.detail.as_deref());
        }
    }

    let mut all_results = cached_results;
    all_results.extend(fresh_results);

    let mut has_error = false;
    for check in &all_results {
        let detail = check.detail.as_deref().unwrap_or("");
        match check.status {
            verifier::CheckStatus::Ok => {
                eprintln!("  ok  {} [thumbnail]", check.url);
            }
            verifier::CheckStatus::Warn => {
                eprintln!("  warn  {} -- {detail}", check.url);
            }
            verifier::CheckStatus::Error => {
                has_error = true;
                eprintln!("  ERROR {} -- {detail}", check.url);
            }
        }
    }

    let ok_count = all_results
        .iter()
        .filter(|c| c.status == verifier::CheckStatus::Ok)
        .count();
    let warn_count = all_results
        .iter()
        .filter(|c| c.status == verifier::CheckStatus::Warn)
        .count();
    let err_count = all_results
        .iter()
        .filter(|c| c.status == verifier::CheckStatus::Error)
        .count();

    eprintln!("{label}: {ok_count} ok, {warn_count} warn, {err_count} error");

    if let Some(ref vc) = verify_cache
        && let Err(e) = vc.save()
    {
        eprintln!("{label}: cache save warning: {e}");
    }

    i32::from(has_error && !warn_only)
}

/// Partition URLs into cached (already verified) and uncached (need checking).
fn partition_cached(
    urls: &[verifier::UrlEntry],
    verify_cache: Option<&cache::VerifyCache>,
) -> (Vec<verifier::UrlCheck>, Vec<verifier::UrlEntry>) {
    let Some(vc) = verify_cache else {
        // No cache -- all URLs need checking
        return (Vec::new(), urls.to_vec());
    };

    let mut cached = Vec::new();
    let mut uncached = Vec::new();

    for entry in urls {
        if let Some(cache_entry) = vc.get(entry.url()) {
            let status = match cache_entry.status.as_str() {
                "ok" => verifier::CheckStatus::Ok,
                "warn" => verifier::CheckStatus::Warn,
                _ => verifier::CheckStatus::Error,
            };
            cached.push(verifier::UrlCheck {
                url: entry.url().to_string(),
                status,
                detail: cache_entry.detail.clone(),
                is_thumbnail: entry.is_thumbnail(),
            });
        } else {
            uncached.push(entry.clone());
        }
    }

    (cached, uncached)
}

fn cmd_build(
    path: Option<&str>,
    root: Option<&str>,
    output_dir: Option<&str>,
    generate_html: bool,
    base_url: &str,
    force_full: bool,
) -> i32 {
    let content_root = resolve_content_root(path, root);
    let reg = match load_registry(&content_root) {
        Ok(r) => r,
        Err(code) => return code,
    };

    let case_files = match resolve_case_files(path, &content_root) {
        Ok(f) => f,
        Err(code) => return code,
    };

    if case_files.is_empty() {
        eprintln!("no case files found");
        return 1;
    }

    // Load build cache for incremental builds
    let mut build_cache = if force_full {
        build_cache::BuildCache::empty()
    } else {
        match build_cache::BuildCache::load(&content_root) {
            Ok(c) => c,
            Err(e) => {
                eprintln!("warning: {e}, starting fresh");
                build_cache::BuildCache::empty()
            }
        }
    };

    // Hash all files upfront for dependency checking
    let mut file_hashes: std::collections::HashMap<String, String> =
        std::collections::HashMap::new();

    // Hash entity files from registry
    for entry in reg.entries() {
        if let Some(path_str) = entry.path.to_str() {
            if let Ok(hash) = build_cache::hash_file(&entry.path) {
                file_hashes.insert(path_str.to_string(), hash);
            }
        }
    }

    // Hash case files
    for case_path in &case_files {
        if let Ok(hash) = build_cache::hash_file(std::path::Path::new(case_path)) {
            file_hashes.insert(case_path.clone(), hash);
        }
    }

    let mut exit_code = 0;
    let mut written_entities = std::collections::HashSet::new();
    let mut all_outputs: Vec<output::CaseOutput> = Vec::new();
    let mut skipped = 0usize;

    for case_path in &case_files {
        // Check incremental cache
        if !force_full && !generate_html {
            if let Some(current_hash) = file_hashes.get(case_path)
                && build_cache.is_unchanged_with_hashes(case_path, current_hash, &file_hashes)
            {
                skipped += 1;
                continue;
            }
        }

        match build_case_output_tracked(case_path, &reg, &mut written_entities) {
            Ok(case_output) => {
                let write_result =
                    write_case_output(case_path, &case_output.case_id, &case_output, output_dir);
                if write_result != 0 {
                    exit_code = write_result;
                }

                // Collect dependency paths for this case
                let deps: Vec<String> = case_output
                    .nodes
                    .iter()
                    .filter_map(|n| {
                        // Find entity files referenced by this case
                        let name = &n.name;
                        reg.get_by_name(name)
                            .and_then(|e| e.path.to_str().map(String::from))
                    })
                    .collect();

                // Update build cache
                if let Some(hash) = file_hashes.get(case_path) {
                    build_cache.put(case_path, hash.clone(), deps);
                }

                if generate_html {
                    all_outputs.push(case_output);
                }
            }
            Err(code) => {
                exit_code = code;
            }
        }
    }

    if skipped > 0 {
        eprintln!("incremental: {skipped} case(s) unchanged, skipped");
    }

    // Update entity file hashes in cache
    for (path_str, hash) in &file_hashes {
        if !case_files.contains(path_str) {
            build_cache.put(path_str, hash.clone(), vec![]);
        }
    }

    // Prune deleted files
    let all_files: std::collections::HashSet<String> = file_hashes.keys().cloned().collect();
    build_cache.prune(&all_files);

    // Save cache
    if let Err(e) = build_cache.save() {
        eprintln!("warning: failed to save build cache: {e}");
    } else if !build_cache.is_empty() {
        eprintln!("build cache: {} entries saved", build_cache.len());
    }

    if generate_html {
        if let Some(dir) = output_dir {
            let html_result = generate_html_output(dir, &all_outputs, base_url);
            if html_result != 0 {
                exit_code = html_result;
            }
        } else {
            eprintln!("--html requires --output directory");
            exit_code = 1;
        }
    }

    exit_code
}

/// Generate static HTML files from built case outputs.
#[allow(clippy::too_many_lines)]
fn generate_html_output(output_dir: &str, cases: &[output::CaseOutput], base_url: &str) -> i32 {
    use weave_content::html;

    let html_dir = format!("{output_dir}/html");
    let cases_dir = format!("{html_dir}/cases");
    let people_dir = format!("{html_dir}/people");
    let orgs_dir = format!("{html_dir}/organizations");

    for dir in [&cases_dir, &people_dir, &orgs_dir] {
        if let Err(e) = std::fs::create_dir_all(dir) {
            eprintln!("error creating directory {dir}: {e}");
            return 2;
        }
    }

    // Track entity appearances across cases for entity pages
    let mut person_cases: std::collections::HashMap<String, Vec<(String, String)>> =
        std::collections::HashMap::new();
    let mut org_cases: std::collections::HashMap<String, Vec<(String, String)>> =
        std::collections::HashMap::new();
    let mut all_people: std::collections::HashMap<String, &output::NodeOutput> =
        std::collections::HashMap::new();
    let mut all_orgs: std::collections::HashMap<String, &output::NodeOutput> =
        std::collections::HashMap::new();

    // Generate case HTML and collect entity references
    for case in cases {
        match html::render_case(case) {
            Ok(fragment) => {
                let path = format!("{cases_dir}/{}.html", case.case_id);
                if let Err(e) = std::fs::write(&path, &fragment) {
                    eprintln!("error writing {path}: {e}");
                    return 2;
                }
                eprintln!("html: {path}");
            }
            Err(e) => {
                eprintln!("error rendering case {}: {e}", case.case_id);
                return 2;
            }
        }

        for node in &case.nodes {
            match node.label.as_str() {
                "person" => {
                    person_cases
                        .entry(node.id.clone())
                        .or_default()
                        .push((case.case_id.clone(), case.title.clone()));
                    all_people.entry(node.id.clone()).or_insert(node);
                }
                "organization" => {
                    org_cases
                        .entry(node.id.clone())
                        .or_default()
                        .push((case.case_id.clone(), case.title.clone()));
                    all_orgs.entry(node.id.clone()).or_insert(node);
                }
                _ => {}
            }
        }
    }

    // Generate person pages
    for (id, node) in &all_people {
        let case_list = person_cases.get(id).cloned().unwrap_or_default();
        match html::render_person(node, &case_list) {
            Ok(fragment) => {
                let path = format!("{people_dir}/{id}.html");
                if let Err(e) = std::fs::write(&path, &fragment) {
                    eprintln!("error writing {path}: {e}");
                    return 2;
                }
            }
            Err(e) => {
                eprintln!("error rendering person {id}: {e}");
                return 2;
            }
        }
    }
    eprintln!("html: {} person pages", all_people.len());

    // Generate organization pages
    for (id, node) in &all_orgs {
        let case_list = org_cases.get(id).cloned().unwrap_or_default();
        match html::render_organization(node, &case_list) {
            Ok(fragment) => {
                let path = format!("{orgs_dir}/{id}.html");
                if let Err(e) = std::fs::write(&path, &fragment) {
                    eprintln!("error writing {path}: {e}");
                    return 2;
                }
            }
            Err(e) => {
                eprintln!("error rendering organization {id}: {e}");
                return 2;
            }
        }
    }
    eprintln!("html: {} organization pages", all_orgs.len());

    // Generate sitemap
    let case_entries: Vec<(String, String)> = cases
        .iter()
        .map(|c| (c.case_id.clone(), c.title.clone()))
        .collect();
    let people_entries: Vec<(String, String)> = all_people
        .iter()
        .map(|(id, n)| (id.clone(), n.name.clone()))
        .collect();
    let org_entries: Vec<(String, String)> = all_orgs
        .iter()
        .map(|(id, n)| (id.clone(), n.name.clone()))
        .collect();

    let sitemap = html::render_sitemap(&case_entries, &people_entries, &org_entries, base_url);
    let sitemap_path = format!("{html_dir}/sitemap.xml");
    if let Err(e) = std::fs::write(&sitemap_path, &sitemap) {
        eprintln!("error writing {sitemap_path}: {e}");
        return 2;
    }
    eprintln!("html: {sitemap_path}");

    0
}

/// Write case output JSON to file or stdout.
fn write_case_output(
    path: &str,
    case_id: &str,
    case_output: &output::CaseOutput,
    output_dir: Option<&str>,
) -> i32 {
    match output_dir {
        Some(dir) => {
            let out_path = format!("{dir}/{case_id}.json");
            match serde_json::to_string_pretty(case_output) {
                Ok(json) => {
                    if let Err(e) = std::fs::write(&out_path, json) {
                        eprintln!("{out_path}: error writing file: {e}");
                        return 2;
                    }
                    eprintln!("{path} -> {out_path}");
                }
                Err(e) => {
                    eprintln!("{path}: JSON serialization error: {e}");
                    return 2;
                }
            }
        }
        None => match serde_json::to_string_pretty(case_output) {
            Ok(json) => println!("{json}"),
            Err(e) => {
                eprintln!("{path}: JSON serialization error: {e}");
                return 2;
            }
        },
    }

    0
}

#[cfg(test)]
mod tests {
    use super::*;

    const FULL_CASE: &str = r"---
id: bonnick-v-arsenal
sources:
  - https://www.theguardian.com/football/2025/feb/03/bonnick
  - https://novaramedia.com/2025/02/04/bonnick
---

# Bonnick v Arsenal FC

Kit manager dismissed over social media posts about Israel-Gaza.

## Events

### Bonnick dismissal
- occurred_at: 2024-12-24
- event_type: dismissal
- description: Arsenal dismisses Bonnick over social media posts
  regarding Israel-Gaza conflict.

### FA investigation finding
- occurred_at: 2024
- event_type: investigation_closed
- description: FA investigates and finds the posts did not breach
  FA rules. Matter closed by FA.

### Employment tribunal filing
- occurred_at: 2025-02-03
- event_type: custom:Employment Tribunal
- description: Bonnick files employment tribunal claim against Arsenal.

## Relationships

- Bonnick dismissal -> FA investigation finding: preceded_by
- FA investigation finding -> Employment tribunal filing: preceded_by
- Bonnick dismissal -> Employment tribunal filing: references
  - source: https://novaramedia.com/2025/02/04/bonnick

## Timeline

Bonnick dismissal -> FA investigation finding -> Employment tribunal filing
";

    #[test]
    fn parse_full_case_file() {
        let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();

        assert_eq!(case.id, "bonnick-v-arsenal");
        assert_eq!(case.title, "Bonnick v Arsenal FC");
        assert!(case.summary.contains("Kit manager dismissed"));
        assert_eq!(case.sources.len(), 2);

        // 3 events (Event entities)
        assert_eq!(entities.len(), 3);
        assert!(entities.iter().all(|e| e.label == entity::Label::Event));

        let dismissal = entities
            .iter()
            .find(|e| e.name == "Bonnick dismissal")
            .unwrap();
        assert_eq!(dismissal.label, entity::Label::Event);

        // 3 explicit rels + 2 preceded_by from timeline = 5
        assert_eq!(rels.len(), 5);

        // Check preceded_by relationships from timeline (no source URLs)
        let timeline_rels: Vec<_> = rels
            .iter()
            .filter(|r| r.rel_type == "preceded_by" && r.source_urls.is_empty())
            .collect();
        assert_eq!(timeline_rels.len(), 2);
        assert_eq!(timeline_rels[0].source_name, "Bonnick dismissal");
        assert_eq!(timeline_rels[0].target_name, "FA investigation finding");
        assert_eq!(timeline_rels[1].source_name, "FA investigation finding");
        assert_eq!(timeline_rels[1].target_name, "Employment tribunal filing");
    }

    #[test]
    fn parse_full_minimal_case() {
        let input = r"---
id: minimal-test
sources:
  - https://example.com/source
---

# Minimal Test Case

A simple test.

## Events

### Something happened
- occurred_at: 2025-01-01
- event_type: conviction
";
        let (case, entities, rels) = parse_full(input, None).unwrap();
        assert_eq!(case.id, "minimal-test");
        assert_eq!(case.title, "Minimal Test Case");
        assert_eq!(entities.len(), 1);
        assert_eq!(entities[0].name, "Something happened");
        assert!(rels.is_empty());
    }

    #[test]
    fn json_snapshot_full_case() {
        let (case, entities, rels) = parse_full(FULL_CASE, None).unwrap();
        let build_result = output::build_output(
            &case.id,
            &case.title,
            &case.summary,
            &case.tags,
            &case.sources,
            &entities,
            &rels,
            &[],
        )
        .unwrap();

        let json = serde_json::to_string_pretty(&build_result.output).unwrap();

        // Verify structure
        assert!(json.contains("\"case_id\": \"bonnick-v-arsenal\""));
        assert!(json.contains("\"title\": \"Bonnick v Arsenal FC\""));
        assert!(json.contains("\"label\": \"event\""));
        assert!(json.contains("\"name\": \"Bonnick dismissal\""));
        assert!(json.contains("\"name\": \"FA investigation finding\""));
        assert!(json.contains("\"event_type\": \"dismissal\""));
        assert!(json.contains("\"event_type\": \"investigation_closed\""));
        assert!(json.contains("\"type\": \"preceded_by\""));
        assert!(json.contains("\"type\": \"references\""));

        // Verify NULIDs
        let output: serde_json::Value = serde_json::from_str(&json).unwrap();
        let nodes = output["nodes"].as_array().unwrap();
        let rels_arr = output["relationships"].as_array().unwrap();

        for node in nodes {
            let id = node["id"].as_str().unwrap();
            assert!(!id.is_empty());
            assert!(id.len() >= 20);
        }
        for rel in rels_arr {
            let id = rel["id"].as_str().unwrap();
            assert!(!id.is_empty());
        }

        // source_id/target_id should reference existing node IDs
        let node_ids: Vec<&str> = nodes.iter().map(|n| n["id"].as_str().unwrap()).collect();
        for rel in rels_arr {
            let source_id = rel["source_id"].as_str().unwrap();
            let target_id = rel["target_id"].as_str().unwrap();
            assert!(
                node_ids.contains(&source_id),
                "source_id {source_id} not found in nodes"
            );
            assert!(
                node_ids.contains(&target_id),
                "target_id {target_id} not found in nodes"
            );
        }
    }

    #[test]
    fn json_snapshot_omits_empty_fields() {
        let input = r"---
id: sparse
sources:
  - https://example.com/src
---

# Sparse Case

Summary.

## Events

### Something
- occurred_at: 2025-01-01
";
        let (case, entities, rels) = parse_full(input, None).unwrap();
        let build_result = output::build_output(
            &case.id,
            &case.title,
            &case.summary,
            &case.tags,
            &case.sources,
            &entities,
            &rels,
            &[],
        )
        .unwrap();

        let json = serde_json::to_string_pretty(&build_result.output).unwrap();

        // These should be omitted (not present at all)
        assert!(!json.contains("\"qualifier\""));
        assert!(!json.contains("\"description\""));
        assert!(!json.contains("\"thumbnail\""));
        assert!(!json.contains("\"aliases\""));
        assert!(!json.contains("\"urls\""));

        // These should be present
        assert!(json.contains("\"occurred_at\": \"2025-01-01\""));
    }

    #[test]
    fn cross_file_resolution_with_registry() {
        use std::path::PathBuf;
        use weave_content::entity::Entity;

        // Create a registry with an actor
        let entries = vec![registry::RegistryEntry {
            entity: Entity {
                name: "Mark Bonnick".to_string(),
                label: entity::Label::Person,
                fields: vec![(
                    "nationality".to_string(),
                    entity::FieldValue::Single("British".to_string()),
                )],
                id: Some("01JXYZ123456789ABCDEFGHIJK".to_string()),
                line: 1,
                tags: Vec::new(),
            },
            path: PathBuf::from("people/mark-bonnick.md"),
            tags: Vec::new(),
        }];
        let reg = registry::EntityRegistry::from_entries(entries).unwrap();

        // Case file references "Mark Bonnick" in relationships
        let input = r"---
id: test-cross-ref
sources:
  - https://example.com/src
---

# Cross Reference Test

Summary.

## Events

### Dismissal
- occurred_at: 2024-12-24
- event_type: dismissal

## Relationships

- Mark Bonnick -> Dismissal: associate_of
";
        // Without registry: should fail (Mark Bonnick not found)
        let err = parse_full(input, None).unwrap_err();
        assert!(err.iter().any(|e| e.message.contains("Mark Bonnick")));

        // With registry: should succeed
        let (case, entities, rels) = parse_full(input, Some(&reg)).unwrap();
        assert_eq!(case.id, "test-cross-ref");
        assert_eq!(entities.len(), 1); // only inline event
        assert_eq!(rels.len(), 1);
        assert_eq!(rels[0].source_name, "Mark Bonnick");
        assert_eq!(rels[0].target_name, "Dismissal");
    }
}