corpora-engine 0.1.0

//! Real TOML front-matter + markdown body parsing (plan Phase 2).
//!
//! A corpus document is a `+++`-fenced TOML header followed by a markdown body:
//!
//! ```text
//! +++
//! id = "D2"
//! kind = "decision"
//! ...
//! +++
//!
//! markdown body…
//! ```
//!
//! The header maps to a [`Record`] (kind → [`Facet`], with field-level errors collected
//! and joined into one [`ParseError`]); the body is scanned with `pulldown-cmark` for
//! bare-id mentions (skipping code), `§` section refs, links, and finding anchors.

use std::collections::BTreeSet;

use corpora_core::{
    Authority, Body, Date, DecisionFacet, DocPath, Door, Edges, Facet, Finding, FindingId, Fork,
    Id, Impl, Kind, Lifecycle, ParseError, Record, Rev, Status,
};
use pulldown_cmark::{Event, Parser as MdParser, Tag, TagEnd};

use crate::parser::FrontMatterFormat;

pub struct TomlFormat;

impl FrontMatterFormat for TomlFormat {
    fn parse(&self, path: &DocPath, text: &str) -> Result<Record, ParseError> {
        let (fm_src, body_src) = split_front_matter(text)
            .ok_or_else(|| ParseError("missing `+++` TOML front matter".into()))?;

        let table: toml::Table = fm_src
            .parse()
            .map_err(|e| ParseError(format!("invalid TOML front matter: {e}")))?;

        let mut errs: Vec<String> = Vec::new();

        // `kind` is load-bearing (it picks the facet); without it we can't build a record.
        let kind = match table.get("kind").and_then(|v| v.as_str()).and_then(parse_kind) {
            Some(k) => k,
            None => return Err(ParseError("missing or invalid `kind`".into())),
        };

        let lifecycle =
            require_enum(&table, "lifecycle", parse_lifecycle, &mut errs).unwrap_or(Lifecycle::Draft);
        let authority =
            require_enum(&table, "authority", parse_authority, &mut errs).unwrap_or(Authority::Normative);
        let last_reviewed = match get_date(&table, "last_reviewed", &mut errs) {
            Some(d) => Date(d),
            None => {
                if !table.contains_key("last_reviewed") {
                    errs.push("missing `last_reviewed`".into());
                }
                Date(String::new())
            }
        };

        let facet = build_facet(kind, &table, &mut errs);

        // schema-v0 §4: `id` is common to every record. Without it a record is invisible to
        // graph indexing, supersession, and E3 — so a missing id is an error, not allowed.
        let id = get_str(&table, "id", &mut errs).map(Id);
        if id.is_none() && !table.contains_key("id") {
            errs.push("missing `id`".into());
        }
        let aka: Vec<Id> = get_ids(&table, "aka", &mut errs);
        let edges = Edges {
            depends_on: get_ids(&table, "depends_on", &mut errs),
            supersedes: get_ids(&table, "supersedes", &mut errs),
            related: get_ids(&table, "related", &mut errs),
            supports: get_ids(&table, "supports", &mut errs),
            driven_by: get_array_str(&table, "driven_by", &mut errs)
                .into_iter()
                .map(FindingId)
                .collect(),
        };

        // Anything already cited via a structured edge (or the record's own id/aka) is not a
        // "bare" mention — exclude it so E3 doesn't double-flag.
        let mut exclude: BTreeSet<String> = BTreeSet::new();
        if let Some(i) = &id {
            exclude.insert(i.0.clone());
        }
        exclude.extend(aka.iter().map(|i| i.0.clone()));
        for e in edges
            .depends_on
            .iter()
            .chain(&edges.supersedes)
            .chain(&edges.related)
            .chain(&edges.supports)
        {
            exclude.insert(e.0.clone());
        }
        let body = extract_body(body_src, &exclude);

        if !errs.is_empty() {
            return Err(ParseError(errs.join("; ")));
        }

        Ok(Record {
            id,
            path: path.clone(),
            kind,
            lifecycle,
            authority,
            last_reviewed,
            aka,
            edges,
            facet,
            body,
        })
    }
}

fn build_facet(kind: Kind, t: &toml::Table, errs: &mut Vec<String>) -> Facet {
    match kind {
        Kind::Decision => {
            let status = require_enum(t, "status", parse_status, errs).unwrap_or(Status::Proposed);
            let date = match get_date(t, "date", errs) {
                Some(d) => Date(d),
                None => {
                    if !t.contains_key("date") {
                        errs.push("decision missing `date`".into());
                    }
                    Date(String::new())
                }
            };
            let implementation = opt_enum(t, "implementation", parse_impl, errs);
            let fork = if status == Status::Open {
                match (
                    get_str(t, "lean", errs),
                    get_str(t, "decide_when", errs),
                    opt_enum(t, "door", parse_door, errs),
                ) {
                    (Some(lean), Some(decide_when), Some(door)) => Some(Fork {
                        lean,
                        decide_when,
                        door,
                    }),
                    _ => {
                        errs.push("open decision requires `lean`, `decide_when`, `door`".into());
                        None
                    }
                }
            } else {
                None
            };
            Facet::Decision(DecisionFacet {
                status,
                date,
                implementation,
                fork,
                realized_by: get_ids(t, "realized_by", errs),
            })
        }
        Kind::Axiom => {
            // An axiom is an assumed external fact: it carries no code (schema-v0 §4).
            for forbidden in ["implementation", "code_revision"] {
                if t.contains_key(forbidden) {
                    errs.push(format!("axiom must not carry `{forbidden}`"));
                }
            }
            Facet::Axiom
        }
        Kind::Invariant | Kind::Architecture => {
            let implementation = opt_enum(t, "implementation", parse_impl, errs);
            let code_revision = get_str(t, "code_revision", errs).map(Rev);
            if implementation.is_some() && code_revision.is_none() && !t.contains_key("code_revision") {
                errs.push("`implementation` present requires `code_revision`".into());
            }
            Facet::Canon {
                implementation,
                code_revision,
            }
        }
        Kind::Current => Facet::Current {
            implementation: require_enum(t, "implementation", parse_impl, errs).unwrap_or(Impl::Absent),
            code_revision: require_rev(t, "code_revision", errs),
            source_revision: get_str(t, "source_revision", errs).map(Rev),
        },
        Kind::Roadmap | Kind::Milestone => Facet::Plan {
            implementation: require_enum(t, "implementation", parse_impl, errs).unwrap_or(Impl::Absent),
            code_revision: require_rev(t, "code_revision", errs),
        },
        Kind::Evidence => {
            let imp = opt_enum(t, "implementation", parse_impl, errs);
            let rev = get_str(t, "code_revision", errs).map(Rev);
            let measured = match (imp, rev) {
                (Some(i), Some(r)) => Some((i, r)),
                (None, None) => None,
                _ => {
                    errs.push("evidence `implementation` and `code_revision` must be paired".into());
                    None
                }
            };
            Facet::Evidence {
                measured,
                source_revision: get_str(t, "source_revision", errs).map(Rev),
            }
        }
        Kind::ReviewLog | Kind::Evolution | Kind::Handoff | Kind::Explainer | Kind::Index => {
            Facet::Narrative
        }
    }
}

// ---- front-matter helpers -------------------------------------------------
//
// Every accessor is type-strict: a key that is *present but the wrong type* is an error,
// not silently treated as absent. Otherwise `supersedes = "D1"` (a string, not an array)
// would quietly drop a graph edge, and a wrong-typed enum would read as missing.

fn get_str(t: &toml::Table, key: &str, errs: &mut Vec<String>) -> Option<String> {
    match t.get(key) {
        None => None,
        Some(toml::Value::String(s)) => Some(s.clone()),
        Some(_) => {
            errs.push(format!("`{key}` must be a string"));
            None
        }
    }
}

fn get_array_str(t: &toml::Table, key: &str, errs: &mut Vec<String>) -> Vec<String> {
    match t.get(key) {
        None => Vec::new(),
        Some(toml::Value::Array(a)) => {
            let mut out = Vec::with_capacity(a.len());
            for (i, x) in a.iter().enumerate() {
                match x {
                    toml::Value::String(s) => out.push(s.clone()),
                    _ => errs.push(format!("`{key}[{i}]` must be a string")),
                }
            }
            out
        }
        Some(_) => {
            errs.push(format!("`{key}` must be an array of strings"));
            Vec::new()
        }
    }
}

fn get_ids(t: &toml::Table, key: &str, errs: &mut Vec<String>) -> Vec<Id> {
    get_array_str(t, key, errs).into_iter().map(Id).collect()
}

/// Accept a plain `YYYY-MM-DD` date — either a bare TOML date or a string in that exact
/// form. A full TOML date-time (with time/offset) or any other string is rejected.
fn get_date(t: &toml::Table, key: &str, errs: &mut Vec<String>) -> Option<String> {
    match t.get(key) {
        None => None,
        Some(toml::Value::Datetime(d)) => match (&d.date, &d.time, &d.offset) {
            (Some(date), None, None) => {
                Some(format!("{:04}-{:02}-{:02}", date.year, date.month, date.day))
            }
            _ => {
                errs.push(format!("`{key}` must be a plain date (YYYY-MM-DD), not a date-time"));
                None
            }
        },
        Some(toml::Value::String(s)) if is_iso_date(s) => Some(s.clone()),
        Some(toml::Value::String(s)) => {
            errs.push(format!("`{key}` must be a date in YYYY-MM-DD form, got {s:?}"));
            None
        }
        Some(_) => {
            errs.push(format!("`{key}` must be a date"));
            None
        }
    }
}

/// `YYYY-MM-DD` shape *and* a real calendar date — `2026-99-99` and `2026-02-30` are
/// rejected, not just mis-shaped strings.
fn is_iso_date(s: &str) -> bool {
    let b = s.as_bytes();
    let shaped = b.len() == 10
        && b[4] == b'-'
        && b[7] == b'-'
        && (0..4).chain(5..7).chain(8..10).all(|i| b[i].is_ascii_digit());
    if !shaped {
        return false;
    }
    let num = |r: std::ops::Range<usize>| s[r].parse::<u32>().unwrap_or(0);
    let (year, month, day) = (num(0..4), num(5..7), num(8..10));
    (1..=12).contains(&month) && (1..=days_in_month(year, month)).contains(&day)
}

fn days_in_month(year: u32, month: u32) -> u32 {
    match month {
        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
        4 | 6 | 9 | 11 => 30,
        2 if (year % 4 == 0 && year % 100 != 0) || year % 400 == 0 => 29,
        2 => 28,
        _ => 0,
    }
}

/// A required string: present-but-wrong-type and absent are both errors.
fn require_str(t: &toml::Table, key: &str, errs: &mut Vec<String>) -> Option<String> {
    let v = get_str(t, key, errs);
    if v.is_none() && !t.contains_key(key) {
        errs.push(format!("missing `{key}`"));
    }
    v
}

fn require_rev(t: &toml::Table, key: &str, errs: &mut Vec<String>) -> Rev {
    Rev(require_str(t, key, errs).unwrap_or_default())
}

fn require_enum<T>(
    t: &toml::Table,
    key: &str,
    f: impl Fn(&str) -> Option<T>,
    errs: &mut Vec<String>,
) -> Option<T> {
    match t.get(key) {
        None => {
            errs.push(format!("missing `{key}`"));
            None
        }
        Some(toml::Value::String(s)) => match f(s) {
            Some(v) => Some(v),
            None => {
                errs.push(format!("invalid `{key}` = {s:?}"));
                None
            }
        },
        Some(_) => {
            errs.push(format!("`{key}` must be a string"));
            None
        }
    }
}

fn opt_enum<T>(
    t: &toml::Table,
    key: &str,
    f: impl Fn(&str) -> Option<T>,
    errs: &mut Vec<String>,
) -> Option<T> {
    match t.get(key) {
        None => None,
        Some(toml::Value::String(s)) => match f(s) {
            Some(v) => Some(v),
            None => {
                errs.push(format!("invalid `{key}` = {s:?}"));
                None
            }
        },
        Some(_) => {
            errs.push(format!("`{key}` must be a string"));
            None
        }
    }
}

fn parse_kind(s: &str) -> Option<Kind> {
    Some(match s {
        "decision" => Kind::Decision,
        "axiom" => Kind::Axiom,
        "invariant" => Kind::Invariant,
        "architecture" => Kind::Architecture,
        "current" => Kind::Current,
        "roadmap" => Kind::Roadmap,
        "milestone" => Kind::Milestone,
        "evidence" => Kind::Evidence,
        "review-log" => Kind::ReviewLog,
        "evolution" => Kind::Evolution,
        "handoff" => Kind::Handoff,
        "explainer" => Kind::Explainer,
        "index" => Kind::Index,
        _ => return None,
    })
}

fn parse_lifecycle(s: &str) -> Option<Lifecycle> {
    Some(match s {
        "draft" => Lifecycle::Draft,
        "current" => Lifecycle::Current,
        "superseded" => Lifecycle::Superseded,
        "historical" => Lifecycle::Historical,
        _ => return None,
    })
}

fn parse_authority(s: &str) -> Option<Authority> {
    Some(match s {
        "normative" => Authority::Normative,
        "axiomatic" => Authority::Axiomatic,
        "descriptive" => Authority::Descriptive,
        "prospective" => Authority::Prospective,
        "evidence" => Authority::Evidence,
        "historical" => Authority::Historical,
        "operational" => Authority::Operational,
        "explanatory" => Authority::Explanatory,
        "navigational" => Authority::Navigational,
        _ => return None,
    })
}

fn parse_status(s: &str) -> Option<Status> {
    Some(match s {
        "open" => Status::Open,
        "proposed" => Status::Proposed,
        "accepted" => Status::Accepted,
        "superseded" => Status::Superseded,
        "deprecated" => Status::Deprecated,
        "rejected" => Status::Rejected,
        _ => return None,
    })
}

fn parse_impl(s: &str) -> Option<Impl> {
    Some(match s {
        "absent" => Impl::Absent,
        "scaffold" => Impl::Scaffold,
        "partial" => Impl::Partial,
        "implemented" => Impl::Implemented,
        "verified" => Impl::Verified,
        _ => return None,
    })
}

fn parse_door(s: &str) -> Option<Door> {
    Some(match s {
        "reversible" => Door::Reversible,
        "one-way" => Door::OneWay,
        _ => return None,
    })
}

/// Split `+++`-fenced TOML front matter from the markdown body. Returns
/// `(front_matter, body)` or `None` if the document isn't fenced.
fn split_front_matter(text: &str) -> Option<(&str, &str)> {
    let text = text.strip_prefix('\u{feff}').unwrap_or(text); // drop a BOM
    let text = text.trim_start_matches(['\r', '\n']);
    let rest = text.strip_prefix("+++")?;
    let rest = rest
        .strip_prefix("\r\n")
        .or_else(|| rest.strip_prefix('\n'))?;

    let mut from = 0;
    loop {
        let rel = rest[from..].find("+++")?;
        let at = from + rel;
        let at_line_start = at == 0 || rest.as_bytes()[at - 1] == b'\n';
        let after = &rest[at + 3..];
        let after = after.strip_prefix('\r').unwrap_or(after);
        let at_line_end = after.is_empty() || after.starts_with('\n');
        if at_line_start && at_line_end {
            let body = after.strip_prefix('\n').unwrap_or(after);
            return Some((&rest[..at], body));
        }
        from = at + 3;
    }
}

// ---- body extraction ------------------------------------------------------

fn extract_body(src: &str, exclude: &BTreeSet<String>) -> Body {
    let mut bare = Vec::new();
    let mut seen_bare: BTreeSet<String> = BTreeSet::new();
    let mut findings = Vec::new();
    let mut seen_find: BTreeSet<String> = BTreeSet::new();
    let mut section_refs = Vec::new();
    let mut links = Vec::new();
    let mut code_depth = 0usize;

    for ev in MdParser::new(src) {
        match ev {
            Event::Start(Tag::CodeBlock(_)) => code_depth += 1,
            Event::End(TagEnd::CodeBlock) => code_depth = code_depth.saturating_sub(1),
            Event::Text(t) if code_depth == 0 => {
                scan_ids(&t, exclude, &mut bare, &mut seen_bare);
                scan_sections(&t, &mut section_refs);
            }
            Event::Start(Tag::Link { dest_url, .. }) => links.push(dest_url.to_string()),
            Event::Html(h) | Event::InlineHtml(h) => scan_anchors(&h, &mut findings, &mut seen_find),
            _ => {} // inline `Code`, code-block text, soft breaks, etc. are ignored
        }
    }

    dedup_preserve(&mut section_refs);
    dedup_preserve(&mut links);

    // Ids named by *local* link destinations are typed (Relation::Link) citations, not bare
    // mentions — pull them out so E3 treats a link to a superseded atom as an error. External
    // URLs are skipped, so `https://example.com/D1` can't masquerade as a citation to D1.
    let mut link_refs = Vec::new();
    let mut seen_link: BTreeSet<String> = BTreeSet::new();
    for dest in links.iter().filter(|d| is_local_link(d)) {
        for tok in tokenize(dest) {
            if is_id_shape(tok) && !exclude.contains(tok) && seen_link.insert(tok.to_string()) {
                link_refs.push(Id(tok.to_string()));
            }
        }
    }
    bare.retain(|i| !seen_link.contains(&i.0));

    Body {
        bare_mentions: bare,
        findings,
        section_refs,
        links,
        link_refs,
    }
}

fn scan_ids(text: &str, exclude: &BTreeSet<String>, out: &mut Vec<Id>, seen: &mut BTreeSet<String>) {
    for tok in tokenize(text) {
        if is_id_shape(tok) && !exclude.contains(tok) && seen.insert(tok.to_string()) {
            out.push(Id(tok.to_string()));
        }
    }
}

fn scan_sections(text: &str, out: &mut Vec<String>) {
    let cs: Vec<char> = text.chars().collect();
    let mut i = 0;
    while i < cs.len() {
        if cs[i] == '§' {
            let mut s = String::new();
            let mut j = i + 1;
            // A section number is digit groups joined by single dots — a dot is only part
            // of the ref if a digit follows it, so trailing prose dots ("§9.6.") don't leak.
            while j < cs.len() {
                if cs[j].is_ascii_digit() {
                    s.push(cs[j]);
                    j += 1;
                } else if cs[j] == '.' && j + 1 < cs.len() && cs[j + 1].is_ascii_digit() {
                    s.push('.');
                    j += 1;
                } else {
                    break;
                }
            }
            if !s.is_empty() {
                out.push(s);
            }
            i = j;
        } else {
            i += 1;
        }
    }
}

/// Remove `<!-- … -->` spans so anchors commented out in the source aren't scanned. An
/// unterminated comment swallows the rest of the fragment (matching HTML behaviour).
fn strip_html_comments(html: &str) -> String {
    let mut out = String::with_capacity(html.len());
    let mut rest = html;
    while let Some(start) = rest.find("<!--") {
        out.push_str(&rest[..start]);
        match rest[start + 4..].find("-->") {
            Some(end) => rest = &rest[start + 4 + end + 3..],
            None => return out,
        }
    }
    out.push_str(rest);
    out
}

/// Findings come only from `<a id="…">` anchors — not `<div id=…>`, `data-id=`, comments.
fn scan_anchors(html: &str, out: &mut Vec<Finding>, seen: &mut BTreeSet<String>) {
    let html = strip_html_comments(html);
    let lower = html.to_ascii_lowercase();
    let lb = lower.as_bytes();
    let mut i = 0;
    while let Some(rel) = lower[i..].find("<a") {
        let start = i + rel;
        let after = start + 2;
        // Genuine <a tag: "<a" followed by a delimiter, not e.g. "<article".
        let is_anchor =
            after >= lb.len() || matches!(lb[after], b' ' | b'\t' | b'\n' | b'\r' | b'>' | b'/');
        i = after;
        if !is_anchor {
            continue;
        }
        let end = lower[start..].find('>').map(|e| start + e).unwrap_or(lb.len());
        if let Some(val) = anchor_id(&html[start..end]) {
            if is_finding_shape(val) && seen.insert(val.to_string()) {
                out.push(Finding {
                    id: FindingId(val.to_string()),
                    status: String::new(), // inline status parsing is Phase 3/4
                });
            }
        }
        i = end;
    }
}

/// Read the `id` attribute's value out of a single opening-tag slice, requiring `id` to be
/// a whole attribute name (so `data-id` / `grid` don't match).
fn anchor_id(tag: &str) -> Option<&str> {
    let lower = tag.to_ascii_lowercase();
    let lb = lower.as_bytes();
    let mut from = 0;
    while let Some(rel) = lower[from..].find("id") {
        let at = from + rel;
        from = at + 2;
        let prev_ok = at == 0
            || !(lb[at - 1].is_ascii_alphanumeric() || lb[at - 1] == b'-' || lb[at - 1] == b'_');
        let mut k = at + 2;
        while k < lb.len() && (lb[k] == b' ' || lb[k] == b'\t') {
            k += 1;
        }
        if !(prev_ok && k < lb.len() && lb[k] == b'=') {
            continue;
        }
        k += 1;
        while k < lb.len() && (lb[k] == b' ' || lb[k] == b'\t') {
            k += 1;
        }
        if k < lb.len() && (lb[k] == b'"' || lb[k] == b'\'') {
            let q = lb[k] as char;
            let vstart = k + 1;
            if let Some(e) = lower[vstart..].find(q) {
                return Some(&tag[vstart..vstart + e]);
            }
        }
    }
    None
}

/// Split on anything that isn't an ASCII alphanumeric or `-`, preserving char boundaries.
fn tokenize(text: &str) -> Vec<&str> {
    let mut out = Vec::new();
    let mut start = None;
    for (i, c) in text.char_indices() {
        let is_tok = c.is_ascii_alphanumeric() || c == '-';
        match (is_tok, start) {
            (true, None) => start = Some(i),
            (false, Some(s)) => {
                out.push(&text[s..i]);
                start = None;
            }
            _ => {}
        }
    }
    if let Some(s) = start {
        out.push(&text[s..]);
    }
    out
}

/// Record-id shape: 1+ uppercase letters, optional `-`, 1+ digits (e.g. `D2`, `F10`, `CD-3`).
fn is_id_shape(s: &str) -> bool {
    let b = s.as_bytes();
    let mut i = 0;
    while i < b.len() && b[i].is_ascii_uppercase() {
        i += 1;
    }
    if i == 0 {
        return false;
    }
    if i < b.len() && b[i] == b'-' {
        i += 1;
    }
    let dstart = i;
    while i < b.len() && b[i].is_ascii_digit() {
        i += 1;
    }
    i > dstart && i == b.len()
}

/// Finding-anchor shape: `[a-z][a-z0-9-]*` ending in a digit (e.g. `r9`, `t6`, `am-3`).
fn is_finding_shape(s: &str) -> bool {
    let b = s.as_bytes();
    !b.is_empty()
        && b[0].is_ascii_lowercase()
        && b.iter().all(|&c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == b'-')
        && b[b.len() - 1].is_ascii_digit()
}

/// A link points within the corpus (relative path or `#anchor`) rather than out to the web.
/// External links carry a URI scheme (`https://…`, `mailto:…`) or are protocol-relative.
fn is_local_link(dest: &str) -> bool {
    let d = dest.trim();
    if d.starts_with("//") || d.contains("://") {
        return false;
    }
    // `scheme:` (mailto:, tel:, …) — a scheme is `[a-z][a-z0-9+.-]*` before the first colon.
    if let Some(colon) = d.find(':') {
        let scheme = &d[..colon];
        let looks_like_scheme = scheme
            .chars()
            .next()
            .is_some_and(|c| c.is_ascii_alphabetic())
            && scheme.chars().all(|c| c.is_ascii_alphanumeric() || matches!(c, '+' | '-' | '.'));
        if looks_like_scheme {
            return false;
        }
    }
    true
}

fn dedup_preserve(v: &mut Vec<String>) {
    let mut seen = BTreeSet::new();
    v.retain(|x| seen.insert(x.clone()));
}

#[cfg(test)]
mod tests {
    use super::*;

    const DECISION: &str = r#"+++
id = "D2"
kind = "decision"
lifecycle = "current"
authority = "normative"
status = "accepted"
date = 2026-06-18
last_reviewed = 2026-06-20
depends_on = ["A1"]
supersedes = ["D1"]
+++

This supersedes the old approach; see §9.6 and the open fork F5.

```text
let y = D9;   // inside a code block — must be ignored
```

It also discusses D7 in prose, and A1 (already a structured edge).
"#;

    fn parse(text: &str) -> Result<Record, ParseError> {
        TomlFormat.parse(&DocPath("x.md".into()), text)
    }

    #[test]
    fn parses_decision_header() {
        let r = parse(DECISION).expect("should parse");
        assert_eq!(r.id, Some(Id("D2".into())));
        assert_eq!(r.kind, Kind::Decision);
        assert_eq!(r.lifecycle, Lifecycle::Current);
        assert_eq!(r.last_reviewed, Date("2026-06-20".into()));
        assert_eq!(r.edges.depends_on, vec![Id("A1".into())]);
        assert_eq!(r.edges.supersedes, vec![Id("D1".into())]);
        match &r.facet {
            Facet::Decision(d) => assert_eq!(d.status, Status::Accepted),
            other => panic!("expected decision facet, got {other:?}"),
        }
    }

    #[test]
    fn body_extraction_excludes_code_and_structured_edges() {
        let r = parse(DECISION).unwrap();
        let mentions: Vec<&str> = r.body.bare_mentions.iter().map(|i| i.0.as_str()).collect();
        assert!(mentions.contains(&"F5"), "{mentions:?}");
        assert!(mentions.contains(&"D7"), "{mentions:?}");
        assert!(!mentions.contains(&"D9"), "code-block id leaked: {mentions:?}");
        assert!(!mentions.contains(&"A1"), "structured edge counted as bare: {mentions:?}");
        assert!(!mentions.contains(&"D2"), "own id counted as bare: {mentions:?}");
        assert!(!mentions.contains(&"D1"), "supersedes target counted as bare: {mentions:?}");
        assert_eq!(r.body.section_refs, vec!["9.6".to_string()]);
    }

    #[test]
    fn missing_front_matter_errors() {
        assert!(parse("no front matter here\n").is_err());
    }

    #[test]
    fn open_decision_requires_fork_fields() {
        let missing = r#"+++
id = "F5"
kind = "decision"
lifecycle = "draft"
authority = "normative"
status = "open"
date = 2026-06-18
last_reviewed = 2026-06-20
+++
body
"#;
        let err = parse(missing).unwrap_err();
        assert!(err.0.contains("open decision requires"), "{}", err.0);

        let ok = r#"+++
id = "F5"
kind = "decision"
lifecycle = "draft"
authority = "normative"
status = "open"
date = 2026-06-18
last_reviewed = 2026-06-20
lean = "jinn-native symbolic"
decide_when = "first obligation linear-arith can't discharge"
door = "reversible"
+++
body
"#;
        let r = parse(ok).unwrap();
        match &r.facet {
            Facet::Decision(d) => {
                let fork = d.fork.as_ref().expect("open decision should carry a fork");
                assert_eq!(fork.door, Door::Reversible);
                assert_eq!(fork.lean, "jinn-native symbolic");
            }
            other => panic!("expected decision, got {other:?}"),
        }
    }

    #[test]
    fn finding_anchors_extracted() {
        let doc = r#"+++
id = "RL1"
kind = "review-log"
lifecycle = "current"
authority = "historical"
last_reviewed = 2026-06-20
+++

<a id="r9"></a> A finding about something.
<a id="not-a-finding"></a> ignored (no trailing digit).
"#;
        let r = parse(doc).unwrap();
        let ids: Vec<&str> = r.body.findings.iter().map(|f| f.id.0.as_str()).collect();
        assert_eq!(ids, vec!["r9"]);
    }

    #[test]
    fn missing_id_errors() {
        let doc = r#"+++
kind = "decision"
lifecycle = "current"
authority = "normative"
status = "accepted"
date = 2026-06-18
last_reviewed = 2026-06-20
+++
body
"#;
        assert!(parse(doc).unwrap_err().0.contains("missing `id`"));
    }

    #[test]
    fn external_link_is_not_a_local_citation() {
        let doc = r#"+++
id = "A1"
kind = "architecture"
lifecycle = "current"
authority = "normative"
last_reviewed = 2026-06-20
+++

See [an unrelated page](https://example.com/D1) and [the local one](./other.md#D2).
"#;
        let r = parse(doc).unwrap();
        let refs: Vec<&str> = r.body.link_refs.iter().map(|i| i.0.as_str()).collect();
        assert_eq!(refs, vec!["D2"], "external D1 must not become a citation");
    }

    /// A decision header with one field overridden, for type-strictness tests.
    fn decision_with(field: &str) -> String {
        format!(
            "+++\n\
             id = \"D2\"\n\
             kind = \"decision\"\n\
             lifecycle = \"current\"\n\
             authority = \"normative\"\n\
             status = \"accepted\"\n\
             date = 2026-06-18\n\
             last_reviewed = 2026-06-20\n\
             {field}\n\
             +++\nbody\n"
        )
    }

    #[test]
    fn bare_string_for_array_field_errors() {
        // `supersedes = "D1"` must not silently collapse to an empty edge list.
        let err = parse(&decision_with(r#"supersedes = "D1""#)).unwrap_err();
        assert!(err.0.contains("`supersedes` must be an array"), "{}", err.0);
    }

    #[test]
    fn mixed_array_element_errors() {
        let err = parse(&decision_with(r#"depends_on = ["A1", 2]"#)).unwrap_err();
        assert!(err.0.contains("depends_on[1]"), "{}", err.0);
    }

    #[test]
    fn wrong_typed_enum_errors() {
        // A non-string status is an error, not "absent".
        let doc = parse(&decision_with("").replace(r#"status = "accepted""#, "status = 5"));
        assert!(doc.unwrap_err().0.contains("`status` must be a string"));
    }

    #[test]
    fn date_must_be_iso_yyyy_mm_dd() {
        // A full TOML date-time is rejected.
        let ts = decision_with("").replace("date = 2026-06-18", "date = 2026-06-18T10:00:00Z");
        assert!(parse(&ts).unwrap_err().0.contains("date"), "timestamp should be rejected");
        // A non-ISO date string is rejected.
        let bad = decision_with("").replace("date = 2026-06-18", r#"date = "June 18, 2026""#);
        assert!(parse(&bad).unwrap_err().0.contains("date"), "free-form date should be rejected");
        // A quoted ISO date string is accepted.
        let ok = decision_with("").replace("date = 2026-06-18", r#"date = "2026-06-18""#);
        assert!(parse(&ok).is_ok());
    }

    #[test]
    fn section_ref_drops_trailing_dot() {
        let doc = r#"+++
id = "EX1"
kind = "explainer"
lifecycle = "current"
authority = "explanatory"
last_reviewed = 2026-06-20
+++

As discussed in §9.6. and also §2, the design holds.
"#;
        let r = parse(doc).unwrap();
        assert_eq!(r.body.section_refs, vec!["9.6".to_string(), "2".to_string()]);
    }

    #[test]
    fn findings_only_from_anchor_tags() {
        let doc = r#"+++
id = "RL2"
kind = "review-log"
lifecycle = "current"
authority = "historical"
last_reviewed = 2026-06-20
+++

<a id="r9"></a> a real finding.
<div id="d9"></div> a div, not an anchor.
<a data-id="x9"></a> data-id, not the id attribute.
<article id="t3"></article> not an <a> tag.
"#;
        let r = parse(doc).unwrap();
        let ids: Vec<&str> = r.body.findings.iter().map(|f| f.id.0.as_str()).collect();
        assert_eq!(ids, vec!["r9"]);
    }

    #[test]
    fn quoted_date_calendar_validated() {
        for bad in ["2026-99-99", "2026-13-01", "2026-00-10", "2026-02-30", "2026-04-31"] {
            let doc = decision_with("").replace("date = 2026-06-18", &format!(r#"date = "{bad}""#));
            assert!(parse(&doc).is_err(), "{bad} should be rejected");
        }
        let leap = decision_with("").replace("date = 2026-06-18", r#"date = "2024-02-29""#);
        assert!(parse(&leap).is_ok(), "valid leap day should parse");
    }

    #[test]
    fn commented_out_anchors_are_not_findings() {
        let doc = r#"+++
id = "RL3"
kind = "review-log"
lifecycle = "current"
authority = "historical"
last_reviewed = 2026-06-20
+++

<!-- <a id="r9"></a> this finding is commented out -->
<a id="t3"></a> a real one.
"#;
        let r = parse(doc).unwrap();
        let ids: Vec<&str> = r.body.findings.iter().map(|f| f.id.0.as_str()).collect();
        assert_eq!(ids, vec!["t3"]);
    }
}