weave-content 0.2.17

#![allow(clippy::module_name_repetitions)]

use std::fmt;

use serde::{Deserialize, Serialize};

/// Maximum length of a case NULID (26 chars Crockford Base32).
const MAX_CASE_ID_LEN: usize = 26;

/// Maximum number of sources in front matter.
const MAX_SOURCES: usize = 20;

/// Maximum length of the case title (H1).
const MAX_TITLE_LEN: usize = 200;

/// Maximum length of the case summary.
const MAX_SUMMARY_LEN: usize = 2000;

/// Known H2 section names for case files (case-insensitive match).
/// People and Organizations are no longer allowed in case files -- they
/// live in standalone entity files under `people/` and `organizations/`.
const KNOWN_CASE_SECTIONS: &[&str] = &[
    "Events",
    "Documents",
    "Assets",
    "Relationships",
    "Timeline",
    "Related Cases",
];

/// A parsed case file with front matter, title, summary, and raw sections.
#[derive(Debug)]
pub struct ParsedCase {
    /// NULID for the case node (None if not yet generated).
    pub id: Option<String>,
    pub sources: Vec<SourceEntry>,
    pub title: String,
    pub summary: String,
    pub sections: Vec<Section>,
    /// Case type from front matter (e.g. `corruption`, `fraud`).
    pub case_type: Option<String>,
    /// Case status from front matter (e.g. `open`, `trial`).
    pub status: Option<String>,
    /// Structured amounts DSL string (e.g. `660000 USD bribe | 250000000 IDR fine`).
    pub amounts: Option<String>,
    /// Tags from front matter for categorization.
    pub tags: Vec<String>,
    /// Related case entries from `## Related Cases` section.
    pub related_cases: Vec<RelatedCase>,
    /// Involved entity entries from `## Involved` section.
    pub involved: Vec<InvolvedEntry>,
}

/// A related case entry from `## Related Cases` section.
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct RelatedCase {
    /// Case path relative to content root (e.g. `id/corruption/2002/blbi-liquidity-aid-scandal`).
    pub case_path: String,
    /// Description of the relationship between the cases.
    pub description: String,
    /// NULID for the `related_to` relationship (auto-generated on first build).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
    /// Line number (1-indexed) where this entry appears in the original file.
    #[serde(skip)]
    pub line: usize,
}

/// An entity reference in the `## Involved` section.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InvolvedEntry {
    /// Entity name (must match a registry entity referenced in the case).
    pub entity_name: String,
    /// NULID for the `involved_in` relationship (auto-generated on first build).
    pub id: Option<String>,
    /// Line number (1-indexed) where this entry appears in the original file.
    pub line: usize,
}

/// A raw H2 section with its heading text and body content.
#[derive(Debug)]
pub struct Section {
    pub kind: SectionKind,
    pub body: String,
    /// Line number (1-indexed) where the H2 heading appears in the original file.
    pub line: usize,
}

/// The type of an H2 section, mapped from heading text.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SectionKind {
    People,
    Organizations,
    Events,
    Documents,
    Assets,
    Relationships,
    Timeline,
    RelatedCases,
    Involved,
}

impl SectionKind {
    fn from_heading(heading: &str) -> Option<Self> {
        match heading.trim() {
            s if s.eq_ignore_ascii_case("People") => Some(Self::People),
            s if s.eq_ignore_ascii_case("Organizations") => Some(Self::Organizations),
            s if s.eq_ignore_ascii_case("Events") => Some(Self::Events),
            s if s.eq_ignore_ascii_case("Documents") => Some(Self::Documents),
            s if s.eq_ignore_ascii_case("Assets") => Some(Self::Assets),
            s if s.eq_ignore_ascii_case("Relationships") => Some(Self::Relationships),
            s if s.eq_ignore_ascii_case("Timeline") => Some(Self::Timeline),
            s if s.eq_ignore_ascii_case("Related Cases") => Some(Self::RelatedCases),
            s if s.eq_ignore_ascii_case("Involved") => Some(Self::Involved),
            _ => None,
        }
    }

    /// Whether this section kind is valid in case files.
    /// People and Organizations are no longer allowed in case files.
    pub fn is_case_section(self) -> bool {
        matches!(
            self,
            Self::Events
                | Self::Documents
                | Self::Assets
                | Self::Relationships
                | Self::Timeline
                | Self::RelatedCases
                | Self::Involved
        )
    }
}

/// A parser error with file location.
#[derive(Debug)]
pub struct ParseError {
    pub line: usize,
    pub message: String,
}

impl fmt::Display for ParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "line {}: {}", self.line, self.message)
    }
}

/// Maximum number of tags per case file.
const MAX_CASE_TAGS: usize = 10;

/// Maximum number of tags per entity file.
const MAX_ENTITY_TAGS: usize = 5;

/// Maximum length of a single tag.
const MAX_TAG_LEN: usize = 50;

/// Maximum number of related case entries per case file.
const MAX_RELATED_CASES: usize = 10;

/// Maximum length of a related case description.
const MAX_RELATED_DESCRIPTION_LEN: usize = 500;

/// Parse the body of a `## Related Cases` section into `RelatedCase` entries.
///
/// Each entry is a bullet `- <case_path>` followed by indented fields:
/// `description: <text>` (required) and `id: <NULID>` (optional, written back).
pub fn parse_related_cases(
    body: &str,
    section_start_line: usize,
    errors: &mut Vec<ParseError>,
) -> Vec<RelatedCase> {
    let mut entries: Vec<(String, String, Option<String>, usize)> = Vec::new(); // (path, desc, id, line)

    for (offset, line) in body.lines().enumerate() {
        let file_line = section_start_line + offset + 1;

        if let Some(rest) = line.strip_prefix("- ") {
            let case_path = rest.trim().to_string();
            entries.push((case_path, String::new(), None, file_line));
        } else if let Some(rest) = line.strip_prefix("  description: ") {
            if let Some(entry) = entries.last_mut() {
                entry.1 = rest.trim().to_string();
            } else {
                errors.push(ParseError {
                    line: file_line,
                    message: "description without a preceding case path".into(),
                });
            }
        } else if let Some(rest) = line.strip_prefix("  id: ") {
            if let Some(entry) = entries.last_mut() {
                entry.2 = Some(rest.trim().to_string());
            } else {
                errors.push(ParseError {
                    line: file_line,
                    message: "id without a preceding case path".into(),
                });
            }
        } else if !line.trim().is_empty() {
            errors.push(ParseError {
                line: file_line,
                message: format!("unexpected line in Related Cases: {line}"),
            });
        }
    }

    if entries.len() > MAX_RELATED_CASES {
        errors.push(ParseError {
            line: section_start_line,
            message: format!(
                "Related Cases exceeds {MAX_RELATED_CASES} entries (got {})",
                entries.len()
            ),
        });
    }

    let mut result = Vec::new();
    for (case_path, description, id, line) in entries {
        if case_path.is_empty() {
            errors.push(ParseError {
                line,
                message: "related case path must not be empty".into(),
            });
            continue;
        }
        if description.is_empty() {
            errors.push(ParseError {
                line,
                message: format!("related case {case_path:?} missing description"),
            });
            continue;
        }
        if description.len() > MAX_RELATED_DESCRIPTION_LEN {
            errors.push(ParseError {
                line,
                message: format!(
                    "related case description exceeds {MAX_RELATED_DESCRIPTION_LEN} chars (got {})",
                    description.len()
                ),
            });
            continue;
        }
        result.push(RelatedCase {
            case_path,
            description,
            id,
            line,
        });
    }

    result
}

/// Maximum number of entries in `## Involved` section.
const MAX_INVOLVED: usize = 50;

/// Parse the body of a `## Involved` section into `InvolvedEntry` items.
///
/// Format:
/// ```text
/// - Entity Name
///   id: 01ABC...
/// ```
pub fn parse_involved(
    body: &str,
    section_start_line: usize,
    errors: &mut Vec<ParseError>,
) -> Vec<InvolvedEntry> {
    let mut entries = Vec::new();
    let lines: Vec<&str> = body.lines().collect();

    let mut i = 0;
    while i < lines.len() {
        let file_line = section_start_line + 1 + i;
        let trimmed = lines[i].trim();

        if trimmed.is_empty() {
            i += 1;
            continue;
        }

        let Some(name) = trimmed.strip_prefix("- ") else {
            errors.push(ParseError {
                line: file_line,
                message: format!("expected involved entry `- Entity Name`, got {trimmed:?}"),
            });
            i += 1;
            continue;
        };

        let entity_name = name.trim().to_string();
        if entity_name.is_empty() {
            errors.push(ParseError {
                line: file_line,
                message: "involved entity name must not be empty".into(),
            });
            i += 1;
            continue;
        }

        // Look ahead for `id:` on the next line
        let mut id: Option<String> = None;
        if i + 1 < lines.len() {
            let next = lines[i + 1].trim();
            if let Some(id_val) = next.strip_prefix("id: ") {
                id = Some(id_val.trim().to_string());
                i += 1;
            }
        }

        entries.push(InvolvedEntry {
            entity_name,
            id,
            line: file_line,
        });

        i += 1;
    }

    if entries.len() > MAX_INVOLVED {
        errors.push(ParseError {
            line: section_start_line,
            message: format!(
                "Involved exceeds {MAX_INVOLVED} entries (got {})",
                entries.len()
            ),
        });
    }

    entries
}

/// YAML front matter schema.
#[derive(Deserialize)]
struct FrontMatter {
    /// NULID for the case node (auto-generated on first build).
    #[serde(default)]
    id: Option<String>,
    #[serde(default)]
    sources: Vec<SourceEntry>,
    #[serde(default)]
    case_type: Option<String>,
    #[serde(default)]
    status: Option<String>,
    #[serde(default)]
    amounts: Option<String>,
    #[serde(default)]
    tags: Vec<String>,
}

/// A source entry in front matter. Supports both bare URL strings and
/// structured objects with metadata.
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
#[serde(untagged)]
pub enum SourceEntry {
    /// Plain URL string (backward-compatible).
    Url(String),
    /// Structured source with metadata.
    Structured {
        url: String,
        #[serde(default)]
        title: Option<String>,
        #[serde(default)]
        published_at: Option<String>,
        #[serde(default)]
        language: Option<String>,
    },
}

impl SourceEntry {
    /// Get the URL from any source entry variant.
    pub fn url(&self) -> &str {
        match self {
            Self::Url(u) => u,
            Self::Structured { url, .. } => url,
        }
    }
}

/// YAML front matter schema for standalone entity files.
/// Only contains an optional `id` field (NULID, generated on first build).
#[derive(Deserialize)]
struct EntityFrontMatter {
    #[serde(default)]
    id: Option<String>,
    #[serde(default)]
    tags: Vec<String>,
}

/// A parsed standalone entity file (actor or institution).
#[derive(Debug)]
pub struct ParsedEntityFile {
    /// Stored NULID from front matter (None if not yet generated).
    pub id: Option<String>,
    /// Entity name from H1 heading.
    pub name: String,
    /// Raw bullet field lines (body after H1, no sections).
    pub body: String,
    /// Line number of the H1 heading in the original file.
    pub title_line: usize,
    /// Tags from front matter.
    pub tags: Vec<String>,
}

/// Parse a Markdown case file into a `ParsedCase`.
///
/// Extracts YAML front matter, H1 title, summary, and H2 sections.
/// Returns errors for malformed structure or boundary violations.
pub fn parse(input: &str) -> Result<ParsedCase, Vec<ParseError>> {
    let mut errors = Vec::new();

    // Extract front matter
    let (front_matter, body_start_line, body) = extract_front_matter(input, &mut errors);

    let Some(front_matter) = front_matter else {
        if errors.is_empty() {
            errors.push(ParseError {
                line: 1,
                message: "missing YAML front matter (expected `---` delimiter)".into(),
            });
        }
        return Err(errors);
    };

    // Validate front matter fields
    validate_front_matter(&front_matter, &mut errors);

    // Extract title, summary, and sections from body
    let (title, summary, mut sections) = extract_body(&body, body_start_line, &mut errors);

    // Parse Related Cases sections
    let mut related_cases = Vec::new();
    for section in &sections {
        if section.kind == SectionKind::RelatedCases {
            let entries = parse_related_cases(&section.body, section.line, &mut errors);
            related_cases.extend(entries);
        }
    }
    // Remove RelatedCases from sections list (consumed)
    sections.retain(|s| s.kind != SectionKind::RelatedCases);

    // Parse Involved sections
    let mut involved = Vec::new();
    for section in &sections {
        if section.kind == SectionKind::Involved {
            let entries = parse_involved(&section.body, section.line, &mut errors);
            involved.extend(entries);
        }
    }
    // Remove Involved from sections list (consumed)
    sections.retain(|s| s.kind != SectionKind::Involved);

    if !errors.is_empty() {
        return Err(errors);
    }

    Ok(ParsedCase {
        id: front_matter.id,
        sources: front_matter.sources,
        title,
        summary,
        sections,
        case_type: front_matter.case_type,
        status: front_matter.status,
        amounts: front_matter.amounts,
        tags: front_matter.tags,
        related_cases,
        involved,
    })
}

/// Parse a standalone entity file (actor or institution).
///
/// Entity files have YAML front matter with optional `id:`, an H1 name,
/// and bullet fields directly in the body. No H2 sections are allowed.
pub fn parse_entity_file(input: &str) -> Result<ParsedEntityFile, Vec<ParseError>> {
    let mut errors = Vec::new();

    let (front_matter, body_start_line, body) = extract_entity_front_matter(input, &mut errors);

    let id = front_matter.as_ref().and_then(|fm| fm.id.clone());
    let tags = front_matter.map_or_else(Vec::new, |fm| fm.tags);

    // Validate entity tags
    if tags.len() > MAX_ENTITY_TAGS {
        errors.push(ParseError {
            line: 2,
            message: format!(
                "front matter `tags` exceeds {MAX_ENTITY_TAGS} entries (got {})",
                tags.len()
            ),
        });
    }
    for (i, tag) in tags.iter().enumerate() {
        if tag.len() > MAX_TAG_LEN {
            errors.push(ParseError {
                line: 2,
                message: format!("front matter tag #{} exceeds {MAX_TAG_LEN} chars", i + 1),
            });
        }
        if tag.is_empty() {
            errors.push(ParseError {
                line: 2,
                message: format!("front matter tag #{} is empty", i + 1),
            });
        }
    }

    // Extract H1 title and body content (no sections allowed)
    let (name, title_line, field_body) = extract_entity_body(&body, body_start_line, &mut errors);

    if !errors.is_empty() {
        return Err(errors);
    }

    Ok(ParsedEntityFile {
        id,
        name,
        body: field_body,
        title_line,
        tags,
    })
}

/// Extract YAML front matter for entity files.
/// Front matter is optional for entity files -- if absent, returns None with no error.
fn extract_entity_front_matter(
    input: &str,
    errors: &mut Vec<ParseError>,
) -> (Option<EntityFrontMatter>, usize, String) {
    let lines: Vec<&str> = input.lines().collect();

    let first_delim = lines.iter().position(|l| l.trim() == "---");
    if first_delim != Some(0) {
        // No front matter -- entire file is body, starting at line 1
        return (None, 1, input.to_string());
    }

    let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
    let Some(close_offset) = close_delim else {
        errors.push(ParseError {
            line: 1,
            message: "unclosed YAML front matter (missing closing `---`)".into(),
        });
        return (None, 1, String::new());
    };

    let close_line = close_offset + 1;
    let yaml_str: String = lines[1..close_line].join("\n");
    let body_start_line = close_line + 2; // 1-indexed line number after closing `---`
    let body = lines[close_line + 1..].join("\n");

    match serde_yaml::from_str::<EntityFrontMatter>(&yaml_str) {
        Ok(fm) => (Some(fm), body_start_line, body),
        Err(e) => {
            errors.push(ParseError {
                line: 2,
                message: format!("invalid YAML front matter: {e}"),
            });
            (None, body_start_line, body)
        }
    }
}

/// Extract H1 name and field body from an entity file.
/// Rejects any H2 sections.
fn extract_entity_body(
    body: &str,
    body_start_line: usize,
    errors: &mut Vec<ParseError>,
) -> (String, usize, String) {
    let lines: Vec<&str> = body.lines().collect();
    let mut name = String::new();
    let mut title_found = false;
    let mut title_line = body_start_line;
    let mut field_lines: Vec<&str> = Vec::new();

    for (i, line) in lines.iter().enumerate() {
        let file_line = body_start_line + i;

        if let Some(heading) = strip_heading(line, 1) {
            if title_found {
                errors.push(ParseError {
                    line: file_line,
                    message: "multiple H1 headings found (expected exactly one)".into(),
                });
                continue;
            }
            name = heading.to_string();
            title_found = true;
            title_line = file_line;
            continue;
        }

        // Reject H2 sections in entity files
        if strip_heading(line, 2).is_some() {
            errors.push(ParseError {
                line: file_line,
                message: "H2 sections are not allowed in entity files".into(),
            });
            continue;
        }

        if title_found {
            field_lines.push(line);
        } else if !line.trim().is_empty() {
            errors.push(ParseError {
                line: file_line,
                message: "expected H1 heading (# Name)".into(),
            });
        }
    }

    if !title_found {
        errors.push(ParseError {
            line: body_start_line,
            message: "missing H1 heading".into(),
        });
    } else if name.len() > MAX_TITLE_LEN {
        errors.push(ParseError {
            line: title_line,
            message: format!("H1 name exceeds {MAX_TITLE_LEN} chars (got {})", name.len()),
        });
    }

    (name, title_line, field_lines.join("\n"))
}

/// Extract YAML front matter delimited by `---` lines.
/// Returns the parsed front matter, the line number where the body starts,
/// and the body text.
fn extract_front_matter(
    input: &str,
    errors: &mut Vec<ParseError>,
) -> (Option<FrontMatter>, usize, String) {
    let lines: Vec<&str> = input.lines().collect();

    // First non-empty line must be `---`
    let first_delim = lines.iter().position(|l| l.trim() == "---");
    if first_delim != Some(0) {
        errors.push(ParseError {
            line: 1,
            message: "missing YAML front matter (expected `---` on first line)".into(),
        });
        return (None, 1, input.to_string());
    }

    // Find closing `---`
    let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
    let Some(close_offset) = close_delim else {
        errors.push(ParseError {
            line: 1,
            message: "unclosed YAML front matter (missing closing `---`)".into(),
        });
        return (None, 1, String::new());
    };

    let close_line = close_offset + 1; // index in `lines`
    let yaml_str: String = lines[1..close_line].join("\n");
    let body_start_line = close_line + 2; // 1-indexed line number after closing `---`
    let body = lines[close_line + 1..].join("\n");

    match serde_yaml::from_str::<FrontMatter>(&yaml_str) {
        Ok(fm) => (Some(fm), body_start_line, body),
        Err(e) => {
            errors.push(ParseError {
                line: 2,
                message: format!("invalid YAML front matter: {e}"),
            });
            (None, body_start_line, body)
        }
    }
}

fn validate_front_matter(fm: &FrontMatter, errors: &mut Vec<ParseError>) {
    // Validate case ID (NULID) if present
    if let Some(id) = &fm.id
        && id.len() != MAX_CASE_ID_LEN {
            errors.push(ParseError {
                line: 2,
                message: format!(
                    "front matter `id` must be a {MAX_CASE_ID_LEN}-char NULID, got {} chars",
                    id.len()
                ),
            });
        }

    // Validate sources count
    if fm.sources.len() > MAX_SOURCES {
        errors.push(ParseError {
            line: 2,
            message: format!(
                "front matter `sources` exceeds {MAX_SOURCES} entries (got {})",
                fm.sources.len()
            ),
        });
    }

    // Validate each source URL is HTTPS
    for (i, source) in fm.sources.iter().enumerate() {
        if !source.url().starts_with("https://") {
            errors.push(ParseError {
                line: 2,
                message: format!("source[{i}] must be HTTPS, got {:?}", source.url()),
            });
        }
    }

    // Validate case_type
    if let Some(ct) = &fm.case_type {
        use crate::domain::CaseType;
        let normalized = ct.to_lowercase().replace(' ', "_");
        if !CaseType::KNOWN.contains(&normalized.as_str())
            && crate::domain::parse_custom(ct).is_none()
        {
            errors.push(ParseError {
                line: 2,
                message: format!(
                    "invalid case_type {:?} (known: {}; use \"custom:Value\" for custom)",
                    ct,
                    CaseType::KNOWN.join(", ")
                ),
            });
        }
    }

    // Validate status
    if let Some(st) = &fm.status {
        use crate::domain::CaseStatus;
        let normalized = st.to_lowercase().replace(' ', "_");
        if !CaseStatus::KNOWN.contains(&normalized.as_str()) {
            errors.push(ParseError {
                line: 2,
                message: format!(
                    "invalid status {:?} (known: {})",
                    st,
                    CaseStatus::KNOWN.join(", ")
                ),
            });
        }
    }

    // Validate tags
    if fm.tags.len() > MAX_CASE_TAGS {
        errors.push(ParseError {
            line: 2,
            message: format!(
                "front matter `tags` exceeds {MAX_CASE_TAGS} entries (got {})",
                fm.tags.len()
            ),
        });
    }
    for (i, tag) in fm.tags.iter().enumerate() {
        if tag.len() > MAX_TAG_LEN {
            errors.push(ParseError {
                line: 2,
                message: format!("tag[{i}] exceeds {MAX_TAG_LEN} chars (got {})", tag.len()),
            });
        }
        if tag.is_empty() {
            errors.push(ParseError {
                line: 2,
                message: format!("tag[{i}] must not be empty"),
            });
        }
    }
}

/// Extract the H1 title, summary text, and H2 sections from the body.
#[allow(clippy::too_many_lines)]
fn extract_body(
    body: &str,
    body_start_line: usize,
    errors: &mut Vec<ParseError>,
) -> (String, String, Vec<Section>) {
    let lines: Vec<&str> = body.lines().collect();
    let mut title = String::new();
    let mut title_found = false;
    let mut summary_lines: Vec<&str> = Vec::new();
    let mut sections: Vec<Section> = Vec::new();

    // Track current H2 section being built
    let mut current_section_kind: Option<SectionKind> = None;
    let mut current_section_line: usize = 0;
    let mut current_section_body: Vec<&str> = Vec::new();

    // State: before H1, after H1 (summary), in sections
    let mut state = State::BeforeTitle;

    for (i, line) in lines.iter().enumerate() {
        let file_line = body_start_line + i; // 1-indexed line in original file

        if let Some(heading) = strip_heading(line, 1) {
            if title_found {
                errors.push(ParseError {
                    line: file_line,
                    message: "multiple H1 headings found (expected exactly one)".into(),
                });
                continue;
            }
            title = heading.to_string();
            title_found = true;
            state = State::Summary;
            continue;
        }

        if let Some(heading) = strip_heading(line, 2) {
            // Flush previous section
            if let Some(kind) = current_section_kind.take() {
                sections.push(Section {
                    kind,
                    body: current_section_body.join("\n"),
                    line: current_section_line,
                });
                current_section_body.clear();
            }

            match SectionKind::from_heading(heading) {
                Some(kind) if kind.is_case_section() => {
                    // Check for duplicate sections
                    if sections.iter().any(|s| s.kind == kind) {
                        errors.push(ParseError {
                            line: file_line,
                            message: format!("duplicate section: ## {heading}"),
                        });
                    }
                    current_section_kind = Some(kind);
                    current_section_line = file_line;
                    state = State::InSection;
                }
                Some(_) => {
                    // Legacy section (People/Organizations) -- not allowed in case files
                    errors.push(ParseError {
                        line: file_line,
                        message: format!(
                            "## {heading} is not allowed in case files (use standalone entity files in people/ or organizations/ instead)"
                        ),
                    });
                }
                None => {
                    errors.push(ParseError {
                        line: file_line,
                        message: format!(
                            "unknown section: ## {heading} (expected one of: {})",
                            KNOWN_CASE_SECTIONS.join(", ")
                        ),
                    });
                }
            }
            continue;
        }

        match state {
            State::BeforeTitle => {
                // Skip blank lines before title
                if !line.trim().is_empty() {
                    errors.push(ParseError {
                        line: file_line,
                        message: "expected H1 title (# Title)".into(),
                    });
                }
            }
            State::Summary => {
                summary_lines.push(line);
            }
            State::InSection => {
                current_section_body.push(line);
            }
        }
    }

    // Flush last section
    if let Some(kind) = current_section_kind.take() {
        sections.push(Section {
            kind,
            body: current_section_body.join("\n"),
            line: current_section_line,
        });
    }

    // Validate title
    if !title_found {
        errors.push(ParseError {
            line: body_start_line,
            message: "missing H1 title".into(),
        });
    } else if title.len() > MAX_TITLE_LEN {
        errors.push(ParseError {
            line: body_start_line,
            message: format!(
                "H1 title exceeds {MAX_TITLE_LEN} chars (got {})",
                title.len()
            ),
        });
    }

    // Build summary (trim leading/trailing blank lines)
    let summary = summary_lines.clone().join("\n").trim().to_string();

    if summary.len() > MAX_SUMMARY_LEN {
        errors.push(ParseError {
            line: body_start_line,
            message: format!(
                "summary exceeds {MAX_SUMMARY_LEN} chars (got {})",
                summary.len()
            ),
        });
    }

    (title, summary, sections)
}

#[derive(Clone, Copy)]
enum State {
    BeforeTitle,
    Summary,
    InSection,
}

/// Strip an ATX heading prefix of the given level. Returns the heading text.
/// E.g., `strip_heading("## Foo", 2)` returns `Some("Foo")`.
fn strip_heading(line: &str, level: usize) -> Option<&str> {
    let prefix = "#".repeat(level);
    let trimmed = line.trim_start();
    if trimmed.starts_with(&prefix) {
        let after = &trimmed[prefix.len()..];
        // Must be followed by space or end of line, and NOT more `#` chars
        if after.is_empty() {
            return Some("");
        }
        if after.starts_with(' ') && !after.starts_with(" #") {
            // Actually, need to exclude `### Foo` when looking for `## Foo`
            return Some(after[1..].trim());
        }
        // Check: `###` should not match `##`
        if after.starts_with('#') {
            return None;
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;

    fn minimal_case() -> String {
        [
            "---",
            "id: 01H9XT7H1J3929RK32FWSRKV88",
            "sources:",
            "  - https://example.com/source",
            "---",
            "",
            "# Test Case Title",
            "",
            "This is the summary.",
            "",
            "## Events",
            "",
            "### Something happened",
            "- occurred_at: 2025-01-01",
            "",
            "## Relationships",
            "",
            "- Something happened -> Something happened: associate_of",
        ]
        .join("\n")
    }

    #[test]
    fn parse_minimal_case() {
        let result = parse(&minimal_case());
        let case = result.unwrap_or_else(|errs| {
            panic!(
                "parse failed: {}",
                errs.iter()
                    .map(ToString::to_string)
                    .collect::<Vec<_>>()
                    .join("; ")
            );
        });

        assert_eq!(case.id.as_deref(), Some("01H9XT7H1J3929RK32FWSRKV88"));
        assert_eq!(case.sources.len(), 1);
        assert_eq!(case.sources[0].url(), "https://example.com/source");
        assert_eq!(case.title, "Test Case Title");
        assert_eq!(case.summary, "This is the summary.");
        assert_eq!(case.sections.len(), 2);
        assert_eq!(case.sections[0].kind, SectionKind::Events);
        assert_eq!(case.sections[1].kind, SectionKind::Relationships);
    }

    #[test]
    fn parse_missing_front_matter() {
        let input = "# Title\n\nSummary.\n";
        let errs = parse(input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("front matter")));
    }

    #[test]
    fn parse_unclosed_front_matter() {
        let input = "---\nsources: []\n# Title\n";
        let errs = parse(input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("unclosed")));
    }

    #[test]
    fn parse_invalid_case_id_wrong_length() {
        let input = "---\nid: short\nsources: []\n---\n\n# Title\n";
        let errs = parse(input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("NULID")));
    }

    #[test]
    fn parse_case_id_absent_is_ok() {
        let input = "---\nsources:\n  - https://example.com\n---\n\n# Title\n\nSummary.\n";
        let case = parse(input).unwrap();
        assert!(case.id.is_none());
    }

    #[test]
    fn parse_non_https_source() {
        let input = "---\nsources:\n  - http://example.com\n---\n\n# Title\n";
        let errs = parse(input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("HTTPS")));
    }

    #[test]
    fn parse_too_many_sources() {
        let sources: Vec<String> = (0..21)
            .map(|i| format!("  - https://example.com/{i}"))
            .collect();
        let input = format!("---\nsources:\n{}\n---\n\n# Title\n", sources.join("\n"));
        let errs = parse(&input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("exceeds 20")));
    }

    #[test]
    fn parse_unknown_section() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "## Unknown Section",
            "",
        ]
        .join("\n");
        let errs = parse(&input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("unknown section")));
    }

    #[test]
    fn parse_duplicate_section() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "## Events",
            "",
            "## Events",
            "",
        ]
        .join("\n");
        let errs = parse(&input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("duplicate")));
    }

    #[test]
    fn parse_multiple_h1() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# First Title",
            "",
            "# Second Title",
            "",
        ]
        .join("\n");
        let errs = parse(&input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("multiple H1")));
    }

    #[test]
    fn parse_all_sections() {
        let input = [
            "---",
            "id: 01H9XT7H1KRQ9SJ7SD9ETB5CVQ",
            "sources:",
            "  - https://example.com/a",
            "---",
            "",
            "# Full Case",
            "",
            "Summary text here.",
            "",
            "## Events",
            "",
            "### Something happened",
            "- occurred_at: 2025-01-01",
            "",
            "## Relationships",
            "",
            "- Alice -> Corp Inc: employed_by",
            "",
            "## Timeline",
            "",
            "Something happened",
        ]
        .join("\n");

        let case = parse(&input).unwrap_or_else(|errs| {
            panic!(
                "parse failed: {}",
                errs.iter()
                    .map(ToString::to_string)
                    .collect::<Vec<_>>()
                    .join("; ")
            );
        });

        assert_eq!(case.id.as_deref(), Some("01H9XT7H1KRQ9SJ7SD9ETB5CVQ"));
        assert_eq!(case.title, "Full Case");
        assert_eq!(case.summary, "Summary text here.");
        assert_eq!(case.sections.len(), 3);
        assert_eq!(case.sections[0].kind, SectionKind::Events);
        assert_eq!(case.sections[1].kind, SectionKind::Relationships);
        assert_eq!(case.sections[2].kind, SectionKind::Timeline);
    }

    #[test]
    fn parse_empty_summary() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "## Events",
            "",
        ]
        .join("\n");

        let case = parse(&input).unwrap_or_else(|errs| {
            panic!(
                "parse failed: {}",
                errs.iter()
                    .map(ToString::to_string)
                    .collect::<Vec<_>>()
                    .join("; ")
            );
        });
        assert_eq!(case.summary, "");
    }

    #[test]
    fn parse_multiline_summary() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "First line of summary.",
            "Second line of summary.",
            "",
            "## Events",
            "",
        ]
        .join("\n");

        let case = parse(&input).unwrap_or_else(|errs| {
            panic!(
                "parse failed: {}",
                errs.iter()
                    .map(ToString::to_string)
                    .collect::<Vec<_>>()
                    .join("; ")
            );
        });
        assert_eq!(
            case.summary,
            "First line of summary.\nSecond line of summary."
        );
    }

    #[test]
    fn strip_heading_levels() {
        assert_eq!(strip_heading("# Title", 1), Some("Title"));
        assert_eq!(strip_heading("## Section", 2), Some("Section"));
        assert_eq!(strip_heading("### Entity", 3), Some("Entity"));
        // H3 should not match H2
        assert_eq!(strip_heading("### Entity", 2), None);
        // H2 should not match H1
        assert_eq!(strip_heading("## Section", 1), None);
        // Not a heading
        assert_eq!(strip_heading("Normal text", 1), None);
    }

    #[test]
    fn section_body_content() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "## Events",
            "",
            "### Bonnick dismissal",
            "- occurred_at: 2024-12-24",
            "- type: termination",
            "",
        ]
        .join("\n");

        let case = parse(&input).unwrap_or_else(|errs| {
            panic!(
                "parse failed: {}",
                errs.iter()
                    .map(ToString::to_string)
                    .collect::<Vec<_>>()
                    .join("; ")
            );
        });

        assert_eq!(case.sections.len(), 1);
        let body = &case.sections[0].body;
        assert!(body.contains("### Bonnick dismissal"));
        assert!(body.contains("- occurred_at: 2024-12-24"));
    }

    #[test]
    fn parse_rejects_people_section_in_case_file() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "## People",
            "",
        ]
        .join("\n");
        let errs = parse(&input).unwrap_err();
        assert!(
            errs.iter()
                .any(|e| e.message.contains("not allowed in case files"))
        );
    }

    #[test]
    fn parse_rejects_organizations_section_in_case_file() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "## Organizations",
            "",
        ]
        .join("\n");
        let errs = parse(&input).unwrap_err();
        assert!(
            errs.iter()
                .any(|e| e.message.contains("not allowed in case files"))
        );
    }

    #[test]
    fn parse_entity_file_with_id() {
        let input = [
            "---",
            "id: 01JXYZ123456789ABCDEFGHIJK",
            "---",
            "",
            "# Mark Bonnick",
            "",
            "- qualifier: Arsenal Kit Manager",
            "- nationality: British",
            "",
        ]
        .join("\n");

        let result = parse_entity_file(&input).unwrap();
        assert_eq!(result.id.as_deref(), Some("01JXYZ123456789ABCDEFGHIJK"));
        assert_eq!(result.name, "Mark Bonnick");
        assert!(result.body.contains("- qualifier: Arsenal Kit Manager"));
        assert!(result.body.contains("- nationality: British"));
    }

    #[test]
    fn parse_entity_file_without_id() {
        let input = [
            "---",
            "---",
            "",
            "# Arsenal FC",
            "",
            "- qualifier: English Football Club",
            "- org_type: sports_club",
            "",
        ]
        .join("\n");

        let result = parse_entity_file(&input).unwrap();
        assert!(result.id.is_none());
        assert_eq!(result.name, "Arsenal FC");
    }

    #[test]
    fn parse_entity_file_no_front_matter() {
        let input = ["# Bob Smith", "", "- nationality: Dutch", ""].join("\n");

        let result = parse_entity_file(&input).unwrap();
        assert!(result.id.is_none());
        assert_eq!(result.name, "Bob Smith");
        assert!(result.body.contains("- nationality: Dutch"));
    }

    #[test]
    fn parse_entity_file_rejects_h2_sections() {
        let input = [
            "---",
            "---",
            "",
            "# Test Entity",
            "",
            "## Relationships",
            "",
        ]
        .join("\n");

        let errs = parse_entity_file(&input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("H2 sections")));
    }

    #[test]
    fn parse_entity_file_missing_h1() {
        let input = ["---", "---", "", "- nationality: Dutch", ""].join("\n");

        let errs = parse_entity_file(&input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("missing H1")));
    }

    #[test]
    fn parse_related_cases_section() {
        let input = [
            "---",
            "tags: [bribery]",
            "sources:",
            "  - https://example.com",
            "---",
            "",
            "# Test Case",
            "",
            "Summary text.",
            "",
            "## Related Cases",
            "",
            "- id/corruption/2002/blbi-liquidity-aid-scandal",
            "  description: Artalyta bribed Urip to influence the BLBI investigation",
            "- id/corruption/2008/another-case",
            "  description: A second related case",
        ]
        .join("\n");

        let case = parse(&input).unwrap_or_else(|errs| {
            panic!(
                "parse failed: {}",
                errs.iter()
                    .map(ToString::to_string)
                    .collect::<Vec<_>>()
                    .join("; ")
            );
        });

        assert_eq!(case.related_cases.len(), 2);
        assert_eq!(
            case.related_cases[0].case_path,
            "id/corruption/2002/blbi-liquidity-aid-scandal"
        );
        assert_eq!(
            case.related_cases[0].description,
            "Artalyta bribed Urip to influence the BLBI investigation"
        );
        assert_eq!(
            case.related_cases[1].case_path,
            "id/corruption/2008/another-case"
        );
        assert_eq!(case.related_cases[1].description, "A second related case");
        // RelatedCases should be consumed and NOT appear in sections
        assert!(
            !case
                .sections
                .iter()
                .any(|s| s.kind == SectionKind::RelatedCases)
        );
    }

    #[test]
    fn parse_related_cases_empty_path() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "## Related Cases",
            "",
            "- ",
            "  description: Some description",
        ]
        .join("\n");

        let errs = parse(&input).unwrap_err();
        assert!(
            errs.iter()
                .any(|e| e.message.contains("case path must not be empty"))
        );
    }

    #[test]
    fn parse_related_cases_missing_description() {
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "## Related Cases",
            "",
            "- id/corruption/2002/some-case",
        ]
        .join("\n");

        let errs = parse(&input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("description")));
    }

    #[test]
    fn parse_related_cases_description_too_long() {
        let long_desc = "x".repeat(501);
        let input = [
            "---",
            "sources: []",
            "---",
            "",
            "# Title",
            "",
            "## Related Cases",
            "",
            "- id/corruption/2002/some-case",
            &format!("  description: {long_desc}"),
        ]
        .join("\n");

        let errs = parse(&input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("exceeds 500")));
    }

    #[test]
    fn parse_related_cases_too_many() {
        let mut lines = vec![
            "---".to_string(),
            "sources: []".to_string(),
            "---".to_string(),
            String::new(),
            "# Title".to_string(),
            String::new(),
            "## Related Cases".to_string(),
            String::new(),
        ];
        for i in 0..11 {
            lines.push(format!("- id/corruption/2002/case-{i}"));
            lines.push(format!("  description: Description {i}"));
        }
        let input = lines.join("\n");

        let errs = parse(&input).unwrap_err();
        assert!(errs.iter().any(|e| e.message.contains("exceeds 10")));
    }
}