Skip to main content

lex_analysis/
diagnostics.rs

1use crate::inline::extract_references;
2use lex_config::{DiagnosticsRulesConfig, RuleConfig, Severity};
3use lex_core::lex::ast::{
4    Annotation, ContentItem, Document, Range, Session, Table, TableRow, TextContent,
5};
6use lex_core::lex::inlines::ReferenceType;
7use lex_extension_host::Registry;
8use std::borrow::Cow;
9use std::collections::HashSet;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum DiagnosticKind {
13    MissingFootnoteDefinition,
14    UnusedFootnoteDefinition,
15    TableInconsistentColumns,
16    /// A label invocation failed schema pre-validation before the
17    /// handler was dispatched. The variant carries which of the
18    /// pre-validation checks tripped.
19    SchemaValidation(SchemaValidationKind),
20    /// A diagnostic emitted by a registered extension handler. The
21    /// `namespace` field is the namespace name (the part before the
22    /// first `.`, e.g., `"acme"` for label `"acme.task"`) — `lex-lsp`
23    /// surfaces it as the diagnostic `source: "lex:<namespace>"` so
24    /// editors can filter by extension.
25    ///
26    /// `code` carries the **bare leaf** the handler supplied (the
27    /// `code` field on `lex_extension::Diagnostic`), *not* the wire
28    /// form. The analyser glues on the namespace prefix in
29    /// [`DiagnosticKind::code`] to produce the wire shape per spec §9
30    /// (`<namespace>.<leaf>`, e.g. `"acme.foo"`; or the per-namespace
31    /// fallback `"acme.diagnostic"` when the handler set `None`).
32    /// Passing an already-prefixed value here would produce a
33    /// double-prefixed wire code (`"acme.acme.foo"`) — handlers should
34    /// supply just the leaf.
35    Handler {
36        namespace: String,
37        code: Option<String>,
38    },
39    /// A label uses the reserved `doc.*` prefix (forbidden under
40    /// `comms/specs/general.lex` §4.1). PR 4 of #584 emits this when
41    /// permissive-mode parse lets the label flow through; the LSP
42    /// then offers a quickfix to rewrite to the blessed shortcut
43    /// (`doc.table` → `table`, `doc.image` → `image`, etc.).
44    ForbiddenLabelPrefix,
45    /// A `lex.*` literal that doesn't match any registered canonical
46    /// in [`lex_core::lex::builtins::CANONICAL_LABELS`]. Typically a
47    /// typo (`lex.fooar`) or a label authored against a future
48    /// version of the core schemas.
49    UnknownLexCanonical,
50    /// A paragraph line that looks like an annotation header (`:: label`)
51    /// but has no closing `::`. There is no "open form" — such a line is
52    /// kept as paragraph text rather than dropped (lex#700) — so this
53    /// warns the author that what looks like metadata is being treated as
54    /// content. The fix is to close the marker: `:: label ::`.
55    UnclosedAnnotation,
56}
57
58/// Severity for analysis-emitted diagnostics. The analyser populates
59/// it for every diagnostic — `lex-lsp` reads `diag.severity`
60/// directly when mapping onto the LSP wire. (Earlier the LSP layer
61/// derived severity from `DiagnosticKind`; that mapping moved
62/// upstream once the extension-emitted diagnostics needed
63/// per-instance severities.)
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum DiagnosticSeverity {
66    Error,
67    Warning,
68    Info,
69    Hint,
70}
71
72/// One of the schema pre-validation checks the analyser owns before
73/// dispatching to a handler. Wire spec / proposal §13.2.
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub enum SchemaValidationKind {
76    /// The namespace is registered but the schema set for that
77    /// namespace doesn't declare this exact label. The walker emits
78    /// this when `Registry::schema_for(label)` returns `None` while
79    /// `is_namespace_healthy(<ns prefix>)` is `true`. Distinguishes
80    /// "typo / out-of-version label" (this variant, surfaced as a
81    /// document error) from "unknown namespace" (silent pass-through
82    /// per the bounded-extensibility rule).
83    UnknownLabel,
84    MissingParam,
85    ParamTypeMismatch,
86    BadAttachment,
87    BodyShapeMismatch,
88}
89
90impl SchemaValidationKind {
91    /// The on-the-wire code for this schema-validation kind. Matches
92    /// the `[diagnostics.rules.schema]` field name in `.lex.toml`.
93    pub fn code(&self) -> &'static str {
94        match self {
95            SchemaValidationKind::UnknownLabel => "schema.unknown-label",
96            SchemaValidationKind::MissingParam => "schema.missing-param",
97            SchemaValidationKind::ParamTypeMismatch => "schema.param-type-mismatch",
98            SchemaValidationKind::BadAttachment => "schema.bad-attachment",
99            SchemaValidationKind::BodyShapeMismatch => "schema.body-shape-mismatch",
100        }
101    }
102}
103
104impl DiagnosticKind {
105    /// The on-the-wire code for this diagnostic kind. The same value
106    /// travels in `lsp_types::Diagnostic.code` and is the key the
107    /// `[diagnostics.rules]` block in `.lex.toml` matches against
108    /// (see [`DiagnosticsRulesConfig::lookup_by_code`]).
109    ///
110    /// For the `Handler` variant — extension-emitted diagnostics —
111    /// this returns the namespace-prefixed code: `"acme.foo"` for
112    /// `Handler { namespace: "acme", code: Some("foo") }`, or
113    /// `"acme.diagnostic"` when the handler omitted a code. The
114    /// namespace prefix is what `[diagnostics.rules]` keys match
115    /// against (spec §9), and the per-namespace `.diagnostic` fallback
116    /// gives users one knob per namespace for code-less handler
117    /// diagnostics rather than a single global `"handler.diagnostic"`.
118    ///
119    /// Returns `Cow<'static, str>` so built-in variants borrow a
120    /// static string (no allocation) while the `Handler` variant owns
121    /// the `format!`-produced result. `apply_rules` runs on every
122    /// document change in the LSP, so avoiding per-built-in allocations
123    /// matters.
124    pub fn code(&self) -> Cow<'static, str> {
125        match self {
126            DiagnosticKind::MissingFootnoteDefinition => "missing-footnote".into(),
127            DiagnosticKind::UnusedFootnoteDefinition => "unused-footnote".into(),
128            DiagnosticKind::TableInconsistentColumns => "table-inconsistent-columns".into(),
129            DiagnosticKind::SchemaValidation(kind) => kind.code().into(),
130            DiagnosticKind::Handler { namespace, code } => match code {
131                Some(c) => format!("{namespace}.{c}").into(),
132                None => format!("{namespace}.diagnostic").into(),
133            },
134            DiagnosticKind::ForbiddenLabelPrefix => "forbidden-label-prefix".into(),
135            DiagnosticKind::UnknownLexCanonical => "unknown-lex-canonical".into(),
136            DiagnosticKind::UnclosedAnnotation => "unclosed-annotation".into(),
137        }
138    }
139}
140
141/// Apply a `[diagnostics.rules]` configuration to a stream of analyser
142/// diagnostics in place. Drops diagnostics whose resolved severity is
143/// `allow`, and remaps the remaining diagnostics' `severity` field:
144///
145/// - `warn` → the diagnostic's intrinsic severity stays unchanged.
146/// - `deny` → severity is upgraded to `Error`.
147///
148/// `lookup_rule` is the resolution function — typically
149/// [`LoadedLexConfig::lookup_diagnostic_rule`](lex_config::LoadedLexConfig::lookup_diagnostic_rule),
150/// which consults the named built-in fields first and the
151/// extension-rules side-channel second. Diagnostics whose code has no
152/// matching entry on either surface pass through untouched at their
153/// intrinsic severity.
154pub fn apply_rules<F>(diagnostics: &mut Vec<AnalysisDiagnostic>, lookup_rule: F)
155where
156    F: Fn(&str) -> Option<RuleConfig>,
157{
158    diagnostics.retain_mut(|diag| {
159        let code = diag.kind.code();
160        let Some(rule) = lookup_rule(&code) else {
161            return true;
162        };
163        match rule.severity() {
164            Severity::Allow => false,
165            Severity::Warn => true,
166            Severity::Deny => {
167                diag.severity = DiagnosticSeverity::Error;
168                true
169            }
170        }
171    });
172}
173
174#[derive(Debug, Clone, PartialEq, Eq)]
175pub struct AnalysisDiagnostic {
176    pub range: Range,
177    /// Severity, set by the analyser for every diagnostic it
178    /// produces. `lex-lsp` reads this directly when mapping onto LSP
179    /// wire severities; the kind-to-severity mapping that lived in
180    /// `to_lsp_diagnostic` is no longer authoritative.
181    pub severity: DiagnosticSeverity,
182    pub kind: DiagnosticKind,
183    pub message: String,
184}
185
186/// Run the analyser without an extension registry — equivalent to
187/// running with an empty registry. Provided for callers that haven't
188/// adopted the extension system yet.
189pub fn analyze(document: &Document) -> Vec<AnalysisDiagnostic> {
190    let registry = Registry::new();
191    analyze_with_registry(document, &registry)
192}
193
194/// Run the analyser with a populated extension registry. Labels whose
195/// namespace is registered get pre-validated against their schema and,
196/// if pre-validation passes, dispatched to the handler's `on_validate`
197/// hook. Handler-emitted diagnostics are merged into the same stream as
198/// the built-in checks.
199pub fn analyze_with_registry(document: &Document, registry: &Registry) -> Vec<AnalysisDiagnostic> {
200    let mut diagnostics = Vec::new();
201    check_footnotes(document, &mut diagnostics);
202    check_tables(document, &mut diagnostics);
203    check_labels(document, &mut diagnostics);
204    check_unclosed_annotations(document, &mut diagnostics);
205    crate::label_dispatch::dispatch_labels(document, registry, &mut diagnostics);
206    diagnostics
207}
208
209/// Warn on paragraph lines that look like an annotation header but never close
210/// the `:: ::` marker (lex#700). There is no "open form": `:: label` with no
211/// closing `::` is not a recognized element, so the parser keeps it as paragraph
212/// text rather than dropping it. This surfaces that — the author likely meant an
213/// annotation and forgot the trailing `::`.
214fn check_unclosed_annotations(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
215    fn emit(
216        tl: &lex_core::lex::ast::elements::paragraph::TextLine,
217        out: &mut Vec<AnalysisDiagnostic>,
218    ) {
219        if looks_like_unclosed_annotation(tl.text()) {
220            out.push(AnalysisDiagnostic {
221                range: tl.location.clone(),
222                severity: DiagnosticSeverity::Warning,
223                kind: DiagnosticKind::UnclosedAnnotation,
224                message: "this line looks like an annotation but has no closing `::`, \
225                          so it is treated as text. Close the marker to make it an \
226                          annotation, e.g. `:: label ::`."
227                    .to_string(),
228            });
229        }
230    }
231
232    fn walk(item: &ContentItem, out: &mut Vec<AnalysisDiagnostic>) {
233        if let ContentItem::Paragraph(p) = item {
234            for line in &p.lines {
235                if let ContentItem::TextLine(tl) = line {
236                    emit(tl, out);
237                }
238            }
239        }
240        if let Some(children) = item.children() {
241            for child in children {
242                walk(child, out);
243            }
244        }
245    }
246
247    for child in &document.root.children {
248        walk(child, diagnostics);
249    }
250}
251
252/// True when a line is shaped like an annotation header (`:: label …`) but has no
253/// closing `::`. Detection is intentionally a lightweight text heuristic — by the
254/// time content reaches the analyser, a *closed* annotation is already its own
255/// node, so any `::`-leading paragraph line is the unclosed shape.
256fn looks_like_unclosed_annotation(text: &str) -> bool {
257    let Some(rest) = text.trim().strip_prefix("::") else {
258        return false;
259    };
260    // A second *structural* `::` means a closed marker — not the unclosed shape.
261    // Scan quote-aware so a `::` inside a quoted parameter value (e.g.
262    // `:: note foo=":: value"`) does not count as a close, matching how the
263    // lexer's structural-marker detection treats it.
264    let mut in_quotes = false;
265    let mut chars = rest.chars().peekable();
266    while let Some(c) = chars.next() {
267        match c {
268            '"' => in_quotes = !in_quotes,
269            ':' if !in_quotes && chars.peek() == Some(&':') => return false,
270            _ => {}
271        }
272    }
273    // Require whitespace after the opening marker, then a label-shaped token
274    // (label.lex: a letter, then letters/digits/`_`/`-`/`.`).
275    let label = rest.trim_start();
276    rest.len() != label.len() && label.chars().next().is_some_and(|c| c.is_alphabetic())
277}
278
279/// Run the analyser with both an extension registry and a
280/// `[diagnostics.rules]` configuration. The configuration is applied
281/// after all checks run, so rule overrides ([`Severity::Allow`] /
282/// [`Severity::Deny`]) take effect uniformly across the diagnostic
283/// stream.
284pub fn analyze_with_rules(
285    document: &Document,
286    registry: &Registry,
287    rules: &DiagnosticsRulesConfig,
288) -> Vec<AnalysisDiagnostic> {
289    let mut diagnostics = analyze_with_registry(document, registry);
290    apply_rules(&mut diagnostics, |code| rules.lookup_by_code(code).cloned());
291    diagnostics
292}
293
294/// Walk every label site in the document and re-classify via
295/// [`classify_label`](lex_core::lex::assembling::stages::normalize_labels::classify_label).
296/// Emits diagnostics for sites that strict-mode parsing would have
297/// rejected — `doc.*` (forbidden) and unknown `lex.*` (not a
298/// registered canonical). The LSP-side permissive parse keeps the
299/// AST building so these surface as in-place diagnostics rather than
300/// as a wholesale parse failure.
301fn check_labels(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
302    use lex_core::lex::assembling::stages::normalize_labels::{
303        classify_label, RejectReason, Resolution,
304    };
305    use lex_core::lex::ast::Label;
306
307    fn emit(label: &Label, diagnostics: &mut Vec<AnalysisDiagnostic>) {
308        if let Resolution::Rejected(reason) = classify_label(&label.value) {
309            // Reuse the normative wording from `RejectReason::message()`
310            // so the strict-mode parser error and the permissive-mode
311            // analysis diagnostic stay literally identical — no chance
312            // of wording drift between the two surfaces.
313            let message = reason.message();
314            let kind = match reason {
315                RejectReason::Forbidden { .. } => DiagnosticKind::ForbiddenLabelPrefix,
316                RejectReason::UnknownCanonical { .. } => DiagnosticKind::UnknownLexCanonical,
317            };
318            diagnostics.push(AnalysisDiagnostic {
319                range: label.location.clone(),
320                severity: DiagnosticSeverity::Error,
321                kind,
322                message,
323            });
324        }
325    }
326
327    // Unified dispatch: every ContentItem flows through `walk_item`,
328    // which emits the type-specific label sites (annotation label,
329    // verbatim closer label, table cells/footnotes) exactly once and
330    // then defers to `attached_annotations` + `item.children()` for
331    // the uniform recursion. The earlier shape had type-specific
332    // walkers (`walk_annotation`, `walk_verbatim`, `walk_table`) that
333    // descended on their own and then `walk_item` descended again —
334    // duplicate-walk regression caught by Copilot's review on PR 589.
335    fn walk_item(item: &ContentItem, diagnostics: &mut Vec<AnalysisDiagnostic>) {
336        match item {
337            ContentItem::Annotation(a) => emit(&a.data.label, diagnostics),
338            ContentItem::VerbatimBlock(v) => emit(&v.closing_data.label, diagnostics),
339            ContentItem::Table(t) => {
340                for row in t.header_rows.iter().chain(t.body_rows.iter()) {
341                    for cell in &row.cells {
342                        for child in cell.children.iter() {
343                            walk_item(child, diagnostics);
344                        }
345                    }
346                }
347                if let Some(footnotes) = t.footnotes.as_ref() {
348                    for ann in footnotes.annotations() {
349                        walk_annotation(ann, diagnostics);
350                    }
351                    for fn_item in footnotes.items.iter() {
352                        walk_item(fn_item, diagnostics);
353                    }
354                }
355            }
356            _ => {}
357        }
358        // Attached annotations (sessions, paragraphs, lists, list
359        // items, verbatim blocks, tables — see `attached_annotations`).
360        if let Some(attached) = attached_annotations(item) {
361            for annotation in attached {
362                walk_annotation(annotation, diagnostics);
363            }
364        }
365        // Generic child descent. For ContentItem::Annotation,
366        // `item.children()` returns the annotation's body children, so
367        // type-specific walking of nested annotations is not needed.
368        if let Some(children) = item.children() {
369            for child in children {
370                walk_item(child, diagnostics);
371            }
372        }
373    }
374
375    fn walk_annotation(annotation: &Annotation, diagnostics: &mut Vec<AnalysisDiagnostic>) {
376        emit(&annotation.data.label, diagnostics);
377        for child in annotation.children.iter() {
378            walk_item(child, diagnostics);
379        }
380    }
381
382    fn walk_session(session: &Session, diagnostics: &mut Vec<AnalysisDiagnostic>) {
383        for annotation in session.annotations() {
384            walk_annotation(annotation, diagnostics);
385        }
386        for child in &session.children {
387            walk_item(child, diagnostics);
388        }
389    }
390
391    fn attached_annotations(item: &ContentItem) -> Option<&[Annotation]> {
392        match item {
393            ContentItem::Session(s) => Some(s.annotations()),
394            ContentItem::Paragraph(p) => Some(p.annotations()),
395            ContentItem::Definition(d) => Some(d.annotations()),
396            ContentItem::List(l) => Some(l.annotations()),
397            ContentItem::ListItem(li) => Some(li.annotations()),
398            ContentItem::VerbatimBlock(v) => Some(v.annotations()),
399            ContentItem::Table(t) => Some(t.annotations()),
400            _ => None,
401        }
402    }
403
404    // Document-level annotations.
405    for annotation in document.annotations() {
406        walk_annotation(annotation, diagnostics);
407    }
408    // Root session walks.
409    walk_session(&document.root, diagnostics);
410}
411
412fn check_footnotes(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
413    // Numbered definitions reachable from outside any table: :: notes ::
414    // annotated lists at document or session scope.
415    let outer_defs: HashSet<u32> = crate::utils::collect_footnote_definitions(document)
416        .into_iter()
417        .filter_map(|(label, _)| label.parse::<u32>().ok())
418        .collect();
419
420    // References outside tables resolve to `outer_defs`; references inside a
421    // table resolve first to that table's own positional footnote list
422    // (`table.footnotes`) and then fall back to `outer_defs`.
423    if let Some(title) = &document.title {
424        check_text(&title.content, &outer_defs, diagnostics);
425    }
426    for annotation in document.annotations() {
427        check_annotation(annotation, &outer_defs, diagnostics);
428    }
429    check_session(&document.root, &outer_defs, diagnostics);
430}
431
432fn check_session(
433    session: &Session,
434    defs: &HashSet<u32>,
435    diagnostics: &mut Vec<AnalysisDiagnostic>,
436) {
437    check_text(&session.title, defs, diagnostics);
438    for annotation in session.annotations() {
439        check_annotation(annotation, defs, diagnostics);
440    }
441    for child in session.children.iter() {
442        check_content(child, defs, diagnostics);
443    }
444}
445
446fn check_content(
447    item: &ContentItem,
448    defs: &HashSet<u32>,
449    diagnostics: &mut Vec<AnalysisDiagnostic>,
450) {
451    match item {
452        ContentItem::Paragraph(p) => {
453            for line in &p.lines {
454                if let ContentItem::TextLine(tl) = line {
455                    check_text(&tl.content, defs, diagnostics);
456                }
457            }
458            for annotation in p.annotations() {
459                check_annotation(annotation, defs, diagnostics);
460            }
461        }
462        ContentItem::Session(s) => check_session(s, defs, diagnostics),
463        ContentItem::List(list) => {
464            for annotation in list.annotations() {
465                check_annotation(annotation, defs, diagnostics);
466            }
467            for entry in &list.items {
468                if let ContentItem::ListItem(li) = entry {
469                    for text in &li.text {
470                        check_text(text, defs, diagnostics);
471                    }
472                    for annotation in li.annotations() {
473                        check_annotation(annotation, defs, diagnostics);
474                    }
475                    for child in li.children.iter() {
476                        check_content(child, defs, diagnostics);
477                    }
478                }
479            }
480        }
481        ContentItem::Definition(def) => {
482            check_text(&def.subject, defs, diagnostics);
483            for annotation in def.annotations() {
484                check_annotation(annotation, defs, diagnostics);
485            }
486            for child in def.children.iter() {
487                check_content(child, defs, diagnostics);
488            }
489        }
490        ContentItem::Annotation(a) => check_annotation(a, defs, diagnostics),
491        ContentItem::VerbatimBlock(v) => {
492            check_text(&v.subject, defs, diagnostics);
493            for annotation in v.annotations() {
494                check_annotation(annotation, defs, diagnostics);
495            }
496        }
497        ContentItem::Table(table) => check_table(table, defs, diagnostics),
498        _ => {}
499    }
500}
501
502fn check_annotation(
503    annotation: &Annotation,
504    defs: &HashSet<u32>,
505    diagnostics: &mut Vec<AnalysisDiagnostic>,
506) {
507    for child in annotation.children.iter() {
508        check_content(child, defs, diagnostics);
509    }
510}
511
512fn check_table(
513    table: &Table,
514    outer_defs: &HashSet<u32>,
515    diagnostics: &mut Vec<AnalysisDiagnostic>,
516) {
517    // Extend the in-scope definitions with the table's positional footnote
518    // list. The table's own numbered items shadow nothing — they just add
519    // table-local numbers that references inside this table may resolve to.
520    // Fast path: most tables have no footnotes, so reuse `outer_defs` rather
521    // than cloning it into a new `HashSet` for every such table.
522    let table_defs = table_footnote_numbers(table);
523    if table_defs.is_empty() {
524        check_table_text(table, outer_defs, diagnostics);
525        return;
526    }
527    let mut scope = outer_defs.clone();
528    scope.extend(table_defs);
529    check_table_text(table, &scope, diagnostics);
530}
531
532fn check_table_text(table: &Table, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
533    check_text(&table.subject, defs, diagnostics);
534    for row in table.all_rows() {
535        for cell in &row.cells {
536            check_text(&cell.content, defs, diagnostics);
537        }
538    }
539    for annotation in table.annotations() {
540        check_annotation(annotation, defs, diagnostics);
541    }
542}
543
544fn table_footnote_numbers(table: &Table) -> HashSet<u32> {
545    let Some(list) = &table.footnotes else {
546        return HashSet::new();
547    };
548    let mut numbers = HashSet::new();
549    for entry in &list.items {
550        if let ContentItem::ListItem(li) = entry {
551            let label = li
552                .marker()
553                .trim()
554                .trim_end_matches(['.', ')', ':'].as_ref())
555                .trim();
556            if let Ok(n) = label.parse::<u32>() {
557                numbers.insert(n);
558            }
559        }
560    }
561    numbers
562}
563
564fn check_text(text: &TextContent, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
565    for reference in extract_references(text) {
566        if let ReferenceType::FootnoteNumber { number } = reference.reference_type {
567            if !defs.contains(&number) {
568                diagnostics.push(AnalysisDiagnostic {
569                    range: reference.range,
570                    severity: DiagnosticSeverity::Error,
571                    kind: DiagnosticKind::MissingFootnoteDefinition,
572                    message: format!(
573                        "Footnote [{number}] has no matching footnote definition in scope"
574                    ),
575                });
576            }
577        }
578    }
579}
580
581fn check_tables(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
582    visit_tables_in_session(&document.root, diagnostics);
583}
584
585fn visit_tables_in_session(session: &Session, diagnostics: &mut Vec<AnalysisDiagnostic>) {
586    for child in session.children.iter() {
587        visit_tables_in_content(child, diagnostics);
588    }
589}
590
591fn visit_tables_in_content(item: &ContentItem, diagnostics: &mut Vec<AnalysisDiagnostic>) {
592    match item {
593        ContentItem::Table(table) => check_table_columns(table, diagnostics),
594        ContentItem::Session(session) => visit_tables_in_session(session, diagnostics),
595        ContentItem::Definition(def) => {
596            for child in def.children.iter() {
597                visit_tables_in_content(child, diagnostics);
598            }
599        }
600        ContentItem::List(list) => {
601            for entry in &list.items {
602                if let ContentItem::ListItem(li) = entry {
603                    for child in li.children.iter() {
604                        visit_tables_in_content(child, diagnostics);
605                    }
606                }
607            }
608        }
609        ContentItem::Annotation(ann) => {
610            for child in ann.children.iter() {
611                visit_tables_in_content(child, diagnostics);
612            }
613        }
614        _ => {}
615    }
616}
617
618/// Check that all rows in a table have the same effective column count.
619///
620/// The effective width of a row accounts for both colspans of its own cells
621/// and rowspan carry-over from cells in prior rows that extend into it.
622/// Rows with different effective widths indicate a structural error (missing
623/// or extra cells).
624fn check_table_columns(table: &Table, diagnostics: &mut Vec<AnalysisDiagnostic>) {
625    let rows: Vec<_> = table.all_rows().collect();
626    if rows.len() < 2 {
627        return;
628    }
629
630    let widths = compute_row_widths(&rows);
631    let expected = widths[0];
632    for (i, &width) in widths.iter().enumerate().skip(1) {
633        if width != expected {
634            diagnostics.push(AnalysisDiagnostic {
635                range: rows[i].location.clone(),
636                severity: DiagnosticSeverity::Warning,
637                kind: DiagnosticKind::TableInconsistentColumns,
638                message: format!(
639                    "Row has {width} columns, expected {expected} (matching first row)"
640                ),
641            });
642        }
643    }
644}
645
646/// Simulate the virtual table grid to compute each row's effective width.
647///
648/// `carry[col]` tracks how many more rows (including the current one) a cell
649/// placed in a prior row still occupies column `col`. Own cells skip columns
650/// where `carry[col] > 0` (those are held by a cell from above via rowspan).
651fn compute_row_widths(rows: &[&TableRow]) -> Vec<usize> {
652    let mut carry: Vec<usize> = Vec::new();
653    let mut widths = Vec::with_capacity(rows.len());
654
655    for row in rows {
656        let mut col = 0;
657        for cell in &row.cells {
658            while col < carry.len() && carry[col] > 0 {
659                col += 1;
660            }
661            let end = col + cell.colspan;
662            if end > carry.len() {
663                carry.resize(end, 0);
664            }
665            for slot in carry.iter_mut().take(end).skip(col) {
666                *slot = cell.rowspan;
667            }
668            col = end;
669        }
670
671        let width = carry
672            .iter()
673            .rposition(|&r| r > 0)
674            .map(|i| i + 1)
675            .unwrap_or(0);
676        widths.push(width);
677
678        // Columns at or beyond `width` are guaranteed 0 (that's how width is
679        // defined), so limit the decrement to the active range and drop the
680        // trailing zeros to keep `carry` proportional to the live grid.
681        for c in carry.iter_mut().take(width) {
682            if *c > 0 {
683                *c -= 1;
684            }
685        }
686        carry.truncate(width);
687    }
688
689    widths
690}
691
692#[cfg(test)]
693mod tests {
694    use super::*;
695    use lex_core::lex::parsing::parse_document_permissive;
696    use lex_core::lex::testing::lexplore::Lexplore;
697
698    fn unclosed_annotation_diags(source: &str) -> Vec<AnalysisDiagnostic> {
699        let doc = parse_document_permissive(source).expect("permissive parse");
700        analyze(&doc)
701            .into_iter()
702            .filter(|d| d.kind == DiagnosticKind::UnclosedAnnotation)
703            .collect()
704    }
705
706    #[test]
707    fn unclosed_annotation_warns_on_open_form() {
708        // `:: note severity=high` (no closing `::`) parses as a paragraph; the
709        // analyser flags it so the author knows it isn't an annotation (lex#700).
710        let diags = unclosed_annotation_diags("Open form:\n\t:: note severity=high\n");
711        assert_eq!(diags.len(), 1, "expected one unclosed-annotation warning");
712        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
713        assert_eq!(diags[0].kind.code(), "unclosed-annotation");
714    }
715
716    #[test]
717    fn unclosed_annotation_silent_on_closed_form_and_prose() {
718        // A properly closed annotation is its own node, not a flagged paragraph.
719        assert!(unclosed_annotation_diags(":: note severity=high ::\n\nBody.\n").is_empty());
720        // Prose that merely mentions `::` is not flagged.
721        assert!(unclosed_annotation_diags("Use :: to start a marker.\n").is_empty());
722    }
723
724    #[test]
725    fn looks_like_unclosed_annotation_heuristic() {
726        assert!(looks_like_unclosed_annotation(":: note"));
727        assert!(looks_like_unclosed_annotation("    :: note severity=high"));
728        // A `::` inside a quoted value is not a structural close, so this is still
729        // an unclosed annotation (lex#704 review).
730        assert!(looks_like_unclosed_annotation(":: note foo=\":: value\""));
731        assert!(!looks_like_unclosed_annotation(":: note ::"));
732        assert!(!looks_like_unclosed_annotation(
733            ":: note foo=\":: value\" ::"
734        )); // real close
735        assert!(!looks_like_unclosed_annotation("::note")); // no whitespace after marker
736        assert!(!looks_like_unclosed_annotation("::")); // no label
737        assert!(!looks_like_unclosed_annotation("just prose"));
738    }
739
740    fn footnote_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
741        analyze(doc)
742            .into_iter()
743            .filter(|d| d.kind == DiagnosticKind::MissingFootnoteDefinition)
744            .collect()
745    }
746
747    fn label_diags(source: &str) -> Vec<AnalysisDiagnostic> {
748        let doc = parse_document_permissive(source).expect("permissive parse");
749        analyze(&doc)
750            .into_iter()
751            .filter(|d| {
752                matches!(
753                    d.kind,
754                    DiagnosticKind::ForbiddenLabelPrefix | DiagnosticKind::UnknownLexCanonical
755                )
756            })
757            .collect()
758    }
759
760    #[test]
761    fn check_labels_emits_for_doc_prefix() {
762        let diags = label_diags(":: doc.table :: x\n\nBody.\n");
763        assert_eq!(diags.len(), 1, "expected 1 forbidden-prefix diagnostic");
764        assert_eq!(diags[0].kind, DiagnosticKind::ForbiddenLabelPrefix);
765        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
766        assert!(
767            diags[0].message.contains("doc.table") && diags[0].message.contains("reserved"),
768            "message names the offending prefix; got: {}",
769            diags[0].message
770        );
771    }
772
773    #[test]
774    fn check_labels_emits_for_unknown_lex_canonical() {
775        let diags = label_diags(":: lex.foobar :: x\n\nBody.\n");
776        assert_eq!(diags.len(), 1, "expected 1 unknown-canonical diagnostic");
777        assert_eq!(diags[0].kind, DiagnosticKind::UnknownLexCanonical);
778        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
779        assert!(
780            diags[0].message.contains("lex.foobar"),
781            "message names the offending label; got: {}",
782            diags[0].message
783        );
784    }
785
786    #[test]
787    fn check_labels_silent_on_accepted_forms() {
788        // Shortcut, prefix-stripped, canonical, and community labels
789        // all accept silently — analysis only flags the two reject
790        // categories from `classify_label`.
791        let sources = [
792            ":: author :: Alice\n\nBody.\n",
793            ":: metadata.author :: Alice\n\nBody.\n",
794            ":: lex.metadata.author :: Alice\n\nBody.\n",
795            ":: acme.task :: x\n\nBody.\n",
796        ];
797        for src in sources {
798            let diags = label_diags(src);
799            assert!(
800                diags.is_empty(),
801                "no label diagnostics expected for {src:?}; got {diags:?}"
802            );
803        }
804    }
805
806    #[test]
807    fn check_labels_finds_verbatim_closer_violations() {
808        let diags =
809            label_diags("Table:\n    | a | b |\n    |---|---|\n    | 1 | 2 |\n:: doc.table ::\n");
810        assert_eq!(diags.len(), 1);
811        assert_eq!(diags[0].kind, DiagnosticKind::ForbiddenLabelPrefix);
812    }
813
814    #[test]
815    fn check_labels_emits_each_offending_site_exactly_once() {
816        // Regression for Copilot's PR 589 callout: the earlier
817        // walker shape descended into a node's children twice (once
818        // via the type-specific helper, once via the generic
819        // `walk_item` fallback), which produced duplicate
820        // diagnostics for any forbidden label nested inside another
821        // label-bearing site. Three nested + adjacent forbidden
822        // labels should produce exactly three diagnostics, not six.
823        let src = ":: doc.outer ::\n    :: doc.inner :: nested body\n\n:: doc.sibling :: x\n";
824        let diags = label_diags(src);
825        assert_eq!(
826            diags.len(),
827            3,
828            "exactly one diagnostic per offending site: {diags:?}"
829        );
830        for d in &diags {
831            assert_eq!(d.kind, DiagnosticKind::ForbiddenLabelPrefix);
832        }
833    }
834
835    #[test]
836    fn detects_missing_footnote_definition() {
837        let doc = Lexplore::footnotes(1).parse().unwrap();
838        let diags = analyze(&doc);
839        assert_eq!(diags.len(), 1);
840        assert_eq!(diags[0].kind, DiagnosticKind::MissingFootnoteDefinition);
841    }
842
843    #[test]
844    fn ignores_valid_footnote_with_notes_annotation() {
845        // :: notes :: annotated list at the document root provides the definitions
846        let doc = Lexplore::footnotes(2).parse().unwrap();
847        assert!(footnote_diags(&doc).is_empty());
848    }
849
850    #[test]
851    fn ignores_valid_list_footnote_in_session() {
852        // :: notes :: inside a session
853        let doc = Lexplore::footnotes(3).parse().unwrap();
854        assert!(footnote_diags(&doc).is_empty());
855    }
856
857    #[test]
858    fn list_without_notes_annotation_is_not_footnotes() {
859        // A "Notes" session without :: notes :: does NOT define footnotes
860        let doc = Lexplore::footnotes(4).parse().unwrap();
861        assert_eq!(footnote_diags(&doc).len(), 1);
862    }
863
864    fn table_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
865        analyze(doc)
866            .into_iter()
867            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
868            .collect()
869    }
870
871    #[test]
872    fn detects_inconsistent_table_columns() {
873        // table-13: 3-col header, 2-col row, 3-col row — middle row is short.
874        let doc = Lexplore::table(13).parse().unwrap();
875        let diags = table_diags(&doc);
876        assert_eq!(diags.len(), 1);
877        assert!(diags[0].message.contains("2 columns"));
878        assert!(diags[0].message.contains("expected 3"));
879    }
880
881    #[test]
882    fn consistent_table_no_diagnostic() {
883        // table-01: minimal 2-column table, all rows consistent.
884        let doc = Lexplore::table(1).parse().unwrap();
885        assert!(table_diags(&doc).is_empty());
886    }
887
888    #[test]
889    fn table_with_rowspan_counts_carry_over() {
890        // table-17: rowspan via ^^ — effective widths remain consistent across rows.
891        let doc = Lexplore::table(17).parse().unwrap();
892        let diags = table_diags(&doc);
893        assert!(
894            diags.is_empty(),
895            "rowspan carry-over should not trigger inconsistent-columns, got: {diags:?}"
896        );
897    }
898
899    #[test]
900    fn table_with_colspan_and_rowspan_mixed() {
901        // table-18: combined >> colspan and ^^ rowspan; effective widths stay consistent.
902        let doc = Lexplore::table(18).parse().unwrap();
903        let diags = table_diags(&doc);
904        assert!(
905            diags.is_empty(),
906            "mixed colspan/rowspan should not trigger inconsistent-columns, got: {diags:?}"
907        );
908    }
909
910    #[test]
911    fn table_with_colspan_counts_effective_width() {
912        // table-04: colspan via >> contributes to effective width; all rows consistent.
913        let doc = Lexplore::table(4).parse().unwrap();
914        assert!(table_diags(&doc).is_empty());
915    }
916
917    #[test]
918    fn footnote_ref_in_table_cell_is_checked() {
919        // footnotes-09: table cell contains [1] but no footnote definition
920        // anywhere in scope — document, session, or table-local.
921        let doc = Lexplore::footnotes(9).parse().unwrap();
922        let diags = footnote_diags(&doc);
923        assert_eq!(diags.len(), 1);
924        assert!(diags[0].message.contains("[1]"));
925    }
926
927    #[test]
928    fn table_scoped_footnotes_resolve_cell_refs() {
929        // footnotes-11: cell refs [1] and [2] resolve to the table's own
930        // positional footnote list (no :: notes :: annotation needed).
931        let doc = Lexplore::footnotes(11).parse().unwrap();
932        let diags = footnote_diags(&doc);
933        assert!(
934            diags.is_empty(),
935            "table-scoped cell refs should resolve to table.footnotes, got: {diags:?}"
936        );
937    }
938
939    #[test]
940    fn table_scoped_footnotes_do_not_leak_out() {
941        // footnotes-12: a [1] ref in body text outside the table must NOT
942        // resolve to the table's own positional footnote list even when the
943        // numbers happen to match. The table's list is table-local.
944        let doc = Lexplore::footnotes(12).parse().unwrap();
945        let diags = footnote_diags(&doc);
946        assert_eq!(
947            diags.len(),
948            1,
949            "only the paragraph ref [1] should be unresolved, got: {diags:?}"
950        );
951        assert!(diags[0].message.contains("[1]"));
952    }
953
954    // ─────────────── apply_rules / DiagnosticKind::code ───────────────
955
956    fn dummy_diag(kind: DiagnosticKind, severity: DiagnosticSeverity) -> AnalysisDiagnostic {
957        AnalysisDiagnostic {
958            range: Range::default(),
959            severity,
960            kind,
961            message: "test".into(),
962        }
963    }
964
965    #[test]
966    fn diagnostic_kind_code_matches_lookup_for_every_builtin() {
967        // Drift test: every built-in DiagnosticKind variant must have a
968        // matching entry in DiagnosticsRulesConfig::lookup_by_code so
969        // configuration overrides reach every rule.
970        let rules = DiagnosticsRulesConfig::default();
971        for kind in [
972            DiagnosticKind::MissingFootnoteDefinition,
973            DiagnosticKind::UnusedFootnoteDefinition,
974            DiagnosticKind::TableInconsistentColumns,
975            DiagnosticKind::ForbiddenLabelPrefix,
976            DiagnosticKind::UnknownLexCanonical,
977            DiagnosticKind::SchemaValidation(SchemaValidationKind::UnknownLabel),
978            DiagnosticKind::SchemaValidation(SchemaValidationKind::MissingParam),
979            DiagnosticKind::SchemaValidation(SchemaValidationKind::ParamTypeMismatch),
980            DiagnosticKind::SchemaValidation(SchemaValidationKind::BadAttachment),
981            DiagnosticKind::SchemaValidation(SchemaValidationKind::BodyShapeMismatch),
982        ] {
983            let code = kind.code();
984            assert!(
985                rules.lookup_by_code(&code).is_some(),
986                "DiagnosticsRulesConfig is missing a field for built-in code {code:?} \
987                 — add it to lookup_by_code (and likely as a struct field too)"
988            );
989        }
990    }
991
992    #[test]
993    fn handler_code_carries_namespace_prefix() {
994        // Wire-shape contract (spec §9): the wire `code` is the
995        // namespace-prefixed form so a `.lex.toml` rule like
996        // `"acme.task-stuck" = "deny"` actually matches what the
997        // handler emitted. The handler supplies the bare leaf (`code`
998        // field on `Diagnostic`); the analyser glues on the namespace.
999        let with_code = DiagnosticKind::Handler {
1000            namespace: "acme".into(),
1001            code: Some("task-stuck".into()),
1002        };
1003        assert_eq!(with_code.code(), "acme.task-stuck");
1004        // Code-less handler diagnostic gets a per-namespace fallback
1005        // — users can target it as `"acme.diagnostic" = "warn"` rather
1006        // than a single global literal.
1007        let without_code = DiagnosticKind::Handler {
1008            namespace: "acme".into(),
1009            code: None,
1010        };
1011        assert_eq!(without_code.code(), "acme.diagnostic");
1012    }
1013
1014    #[test]
1015    fn apply_rules_matches_extension_code_via_side_channel() {
1016        // End-to-end: handler emits `acme.foo`, user configured
1017        // `"acme.foo" = "allow"` in `[diagnostics.rules]` (now
1018        // captured into the LSP's `extension_diagnostic_rules`
1019        // side-channel by the `on_unknown_key` callback rather than
1020        // landing in a `#[serde(flatten)] extra` map); diagnostic
1021        // gets dropped.
1022        use std::collections::BTreeMap;
1023        // The closure mirrors `LoadedLexConfig::lookup_diagnostic_rule`:
1024        // built-in first, side-channel second.
1025        let lookup = |code: &str, side: &BTreeMap<String, lex_config::RuleConfig>| {
1026            DiagnosticsRulesConfig::default()
1027                .lookup_by_code(code)
1028                .cloned()
1029                .or_else(|| side.get(code).cloned())
1030        };
1031
1032        let side: BTreeMap<String, lex_config::RuleConfig> = [(
1033            "acme.foo".to_string(),
1034            lex_config::RuleConfig::Bare(Severity::Allow),
1035        )]
1036        .into_iter()
1037        .collect();
1038        let mut diags = vec![dummy_diag(
1039            DiagnosticKind::Handler {
1040                namespace: "acme".into(),
1041                code: Some("foo".into()),
1042            },
1043            DiagnosticSeverity::Error,
1044        )];
1045        apply_rules(&mut diags, |code| lookup(code, &side));
1046        assert!(diags.is_empty(), "allow drops the extension diagnostic");
1047
1048        // `warn` keeps the intrinsic severity (Error stays Error).
1049        let side: BTreeMap<String, lex_config::RuleConfig> = [(
1050            "acme.foo".to_string(),
1051            lex_config::RuleConfig::Bare(Severity::Warn),
1052        )]
1053        .into_iter()
1054        .collect();
1055        let mut diags = vec![dummy_diag(
1056            DiagnosticKind::Handler {
1057                namespace: "acme".into(),
1058                code: Some("foo".into()),
1059            },
1060            DiagnosticSeverity::Error,
1061        )];
1062        apply_rules(&mut diags, |code| lookup(code, &side));
1063        assert_eq!(diags.len(), 1);
1064        assert_eq!(
1065            diags[0].severity,
1066            DiagnosticSeverity::Error,
1067            "warn preserves the handler's intrinsic severity"
1068        );
1069
1070        // `deny` is a no-op when the intrinsic is already Error, but
1071        // still keeps the diagnostic — symmetry with built-ins.
1072        let side: BTreeMap<String, lex_config::RuleConfig> = [(
1073            "acme.foo".to_string(),
1074            lex_config::RuleConfig::Bare(Severity::Deny),
1075        )]
1076        .into_iter()
1077        .collect();
1078        let mut diags = vec![dummy_diag(
1079            DiagnosticKind::Handler {
1080                namespace: "acme".into(),
1081                code: Some("foo".into()),
1082            },
1083            DiagnosticSeverity::Error,
1084        )];
1085        apply_rules(&mut diags, |code| lookup(code, &side));
1086        assert_eq!(diags.len(), 1);
1087        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1088
1089        // A configured rule whose code doesn't match the emitted one
1090        // passes the diagnostic through untouched.
1091        let side: BTreeMap<String, lex_config::RuleConfig> = [(
1092            "acme.other".to_string(),
1093            lex_config::RuleConfig::Bare(Severity::Allow),
1094        )]
1095        .into_iter()
1096        .collect();
1097        let mut diags = vec![dummy_diag(
1098            DiagnosticKind::Handler {
1099                namespace: "acme".into(),
1100                code: Some("foo".into()),
1101            },
1102            DiagnosticSeverity::Warning,
1103        )];
1104        apply_rules(&mut diags, |code| lookup(code, &side));
1105        assert_eq!(diags.len(), 1);
1106        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1107    }
1108
1109    #[test]
1110    fn apply_rules_allow_drops_diagnostic() {
1111        let mut diags = vec![dummy_diag(
1112            DiagnosticKind::MissingFootnoteDefinition,
1113            DiagnosticSeverity::Error,
1114        )];
1115        let rules = DiagnosticsRulesConfig {
1116            missing_footnote: lex_config::RuleConfig::Bare(Severity::Allow),
1117            ..Default::default()
1118        };
1119        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1120        assert!(diags.is_empty(), "allow should drop the diagnostic");
1121    }
1122
1123    #[test]
1124    fn apply_rules_deny_upgrades_to_error() {
1125        let mut diags = vec![dummy_diag(
1126            DiagnosticKind::TableInconsistentColumns,
1127            DiagnosticSeverity::Warning,
1128        )];
1129        let rules = DiagnosticsRulesConfig {
1130            table_inconsistent_columns: lex_config::RuleConfig::Bare(Severity::Deny),
1131            ..Default::default()
1132        };
1133        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1134        assert_eq!(diags.len(), 1);
1135        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1136    }
1137
1138    #[test]
1139    fn apply_rules_warn_keeps_intrinsic_severity() {
1140        let mut diags = vec![dummy_diag(
1141            DiagnosticKind::TableInconsistentColumns,
1142            DiagnosticSeverity::Warning,
1143        )];
1144        let rules = DiagnosticsRulesConfig {
1145            table_inconsistent_columns: lex_config::RuleConfig::Bare(Severity::Warn),
1146            ..Default::default()
1147        };
1148        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1149        assert_eq!(diags.len(), 1);
1150        assert_eq!(
1151            diags[0].severity,
1152            DiagnosticSeverity::Warning,
1153            "warn should not change the intrinsic severity"
1154        );
1155    }
1156
1157    #[test]
1158    fn apply_rules_unknown_code_is_passthrough() {
1159        // An extension-emitted diagnostic with a code the registry
1160        // does not know about must pass through unmodified. The
1161        // handler's `code` is the bare leaf — the analyser glues on
1162        // `acme.` to produce wire `acme.unknown`.
1163        let mut diags = vec![dummy_diag(
1164            DiagnosticKind::Handler {
1165                namespace: "acme".into(),
1166                code: Some("unknown".into()),
1167            },
1168            DiagnosticSeverity::Warning,
1169        )];
1170        let rules = DiagnosticsRulesConfig::default();
1171        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1172        assert_eq!(diags.len(), 1, "unknown codes should pass through");
1173        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1174    }
1175
1176    #[test]
1177    fn apply_rules_preserves_order_of_kept_diagnostics() {
1178        // Mixed stream: one to drop, one to keep, one to upgrade.
1179        let mut diags = vec![
1180            dummy_diag(
1181                DiagnosticKind::MissingFootnoteDefinition,
1182                DiagnosticSeverity::Error,
1183            ),
1184            dummy_diag(
1185                DiagnosticKind::UnusedFootnoteDefinition,
1186                DiagnosticSeverity::Warning,
1187            ),
1188            dummy_diag(
1189                DiagnosticKind::TableInconsistentColumns,
1190                DiagnosticSeverity::Warning,
1191            ),
1192        ];
1193        let rules = DiagnosticsRulesConfig {
1194            missing_footnote: lex_config::RuleConfig::Bare(Severity::Allow),
1195            table_inconsistent_columns: lex_config::RuleConfig::Bare(Severity::Deny),
1196            ..Default::default()
1197        };
1198        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1199        assert_eq!(diags.len(), 2);
1200        assert_eq!(diags[0].kind, DiagnosticKind::UnusedFootnoteDefinition);
1201        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1202        assert_eq!(diags[1].kind, DiagnosticKind::TableInconsistentColumns);
1203        assert_eq!(diags[1].severity, DiagnosticSeverity::Error);
1204    }
1205}