Skip to main content

lex_analysis/
diagnostics.rs

1use crate::inline::extract_references;
2use lex_config::{DiagnosticsRulesConfig, RuleConfig, Severity};
3use lex_core::lex::ast::{
4    Annotation, ContentItem, Document, Range, Session, Table, TableRow, TextContent,
5};
6use lex_core::lex::inlines::ReferenceType;
7use lex_extension_host::Registry;
8use std::borrow::Cow;
9use std::collections::HashSet;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum DiagnosticKind {
13    MissingFootnoteDefinition,
14    UnusedFootnoteDefinition,
15    TableInconsistentColumns,
16    /// A label invocation failed schema pre-validation before the
17    /// handler was dispatched. The variant carries which of the
18    /// pre-validation checks tripped.
19    SchemaValidation(SchemaValidationKind),
20    /// A diagnostic emitted by a registered extension handler. The
21    /// `namespace` field is the namespace name (the part before the
22    /// first `.`, e.g., `"acme"` for label `"acme.task"`) — `lex-lsp`
23    /// surfaces it as the diagnostic `source: "lex:<namespace>"` so
24    /// editors can filter by extension.
25    ///
26    /// `code` carries the **bare leaf** the handler supplied (the
27    /// `code` field on `lex_extension::Diagnostic`), *not* the wire
28    /// form. The analyser glues on the namespace prefix in
29    /// [`DiagnosticKind::code`] to produce the wire shape per spec §9
30    /// (`<namespace>.<leaf>`, e.g. `"acme.foo"`; or the per-namespace
31    /// fallback `"acme.diagnostic"` when the handler set `None`).
32    /// Passing an already-prefixed value here would produce a
33    /// double-prefixed wire code (`"acme.acme.foo"`) — handlers should
34    /// supply just the leaf.
35    Handler {
36        namespace: String,
37        code: Option<String>,
38    },
39    /// A label uses the reserved `doc.*` prefix (forbidden under
40    /// `comms/specs/general.lex` §4.1). PR 4 of #584 emits this when
41    /// permissive-mode parse lets the label flow through; the LSP
42    /// then offers a quickfix to rewrite to the blessed shortcut
43    /// (`doc.table` → `table`, `doc.image` → `image`, etc.).
44    ForbiddenLabelPrefix,
45    /// A `lex.*` literal that doesn't match any registered canonical
46    /// in [`lex_core::lex::builtins::CANONICAL_LABELS`]. Typically a
47    /// typo (`lex.fooar`) or a label authored against a future
48    /// version of the core schemas.
49    UnknownLexCanonical,
50}
51
52/// Severity for analysis-emitted diagnostics. The analyser populates
53/// it for every diagnostic — `lex-lsp` reads `diag.severity`
54/// directly when mapping onto the LSP wire. (Earlier the LSP layer
55/// derived severity from `DiagnosticKind`; that mapping moved
56/// upstream once the extension-emitted diagnostics needed
57/// per-instance severities.)
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum DiagnosticSeverity {
60    Error,
61    Warning,
62    Info,
63    Hint,
64}
65
66/// One of the schema pre-validation checks the analyser owns before
67/// dispatching to a handler. Wire spec / proposal §13.2.
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub enum SchemaValidationKind {
70    /// The namespace is registered but the schema set for that
71    /// namespace doesn't declare this exact label. The walker emits
72    /// this when `Registry::schema_for(label)` returns `None` while
73    /// `is_namespace_healthy(<ns prefix>)` is `true`. Distinguishes
74    /// "typo / out-of-version label" (this variant, surfaced as a
75    /// document error) from "unknown namespace" (silent pass-through
76    /// per the bounded-extensibility rule).
77    UnknownLabel,
78    MissingParam,
79    ParamTypeMismatch,
80    BadAttachment,
81    BodyShapeMismatch,
82}
83
84impl SchemaValidationKind {
85    /// The on-the-wire code for this schema-validation kind. Matches
86    /// the `[diagnostics.rules.schema]` field name in `.lex.toml`.
87    pub fn code(&self) -> &'static str {
88        match self {
89            SchemaValidationKind::UnknownLabel => "schema.unknown-label",
90            SchemaValidationKind::MissingParam => "schema.missing-param",
91            SchemaValidationKind::ParamTypeMismatch => "schema.param-type-mismatch",
92            SchemaValidationKind::BadAttachment => "schema.bad-attachment",
93            SchemaValidationKind::BodyShapeMismatch => "schema.body-shape-mismatch",
94        }
95    }
96}
97
98impl DiagnosticKind {
99    /// The on-the-wire code for this diagnostic kind. The same value
100    /// travels in `lsp_types::Diagnostic.code` and is the key the
101    /// `[diagnostics.rules]` block in `.lex.toml` matches against
102    /// (see [`DiagnosticsRulesConfig::lookup_by_code`]).
103    ///
104    /// For the `Handler` variant — extension-emitted diagnostics —
105    /// this returns the namespace-prefixed code: `"acme.foo"` for
106    /// `Handler { namespace: "acme", code: Some("foo") }`, or
107    /// `"acme.diagnostic"` when the handler omitted a code. The
108    /// namespace prefix is what `[diagnostics.rules]` keys match
109    /// against (spec §9), and the per-namespace `.diagnostic` fallback
110    /// gives users one knob per namespace for code-less handler
111    /// diagnostics rather than a single global `"handler.diagnostic"`.
112    ///
113    /// Returns `Cow<'static, str>` so built-in variants borrow a
114    /// static string (no allocation) while the `Handler` variant owns
115    /// the `format!`-produced result. `apply_rules` runs on every
116    /// document change in the LSP, so avoiding per-built-in allocations
117    /// matters.
118    pub fn code(&self) -> Cow<'static, str> {
119        match self {
120            DiagnosticKind::MissingFootnoteDefinition => "missing-footnote".into(),
121            DiagnosticKind::UnusedFootnoteDefinition => "unused-footnote".into(),
122            DiagnosticKind::TableInconsistentColumns => "table-inconsistent-columns".into(),
123            DiagnosticKind::SchemaValidation(kind) => kind.code().into(),
124            DiagnosticKind::Handler { namespace, code } => match code {
125                Some(c) => format!("{namespace}.{c}").into(),
126                None => format!("{namespace}.diagnostic").into(),
127            },
128            DiagnosticKind::ForbiddenLabelPrefix => "forbidden-label-prefix".into(),
129            DiagnosticKind::UnknownLexCanonical => "unknown-lex-canonical".into(),
130        }
131    }
132}
133
134/// Apply a `[diagnostics.rules]` configuration to a stream of analyser
135/// diagnostics in place. Drops diagnostics whose resolved severity is
136/// `allow`, and remaps the remaining diagnostics' `severity` field:
137///
138/// - `warn` → the diagnostic's intrinsic severity stays unchanged.
139/// - `deny` → severity is upgraded to `Error`.
140///
141/// `lookup_rule` is the resolution function — typically
142/// [`LoadedLexConfig::lookup_diagnostic_rule`](lex_config::LoadedLexConfig::lookup_diagnostic_rule),
143/// which consults the named built-in fields first and the
144/// extension-rules side-channel second. Diagnostics whose code has no
145/// matching entry on either surface pass through untouched at their
146/// intrinsic severity.
147pub fn apply_rules<F>(diagnostics: &mut Vec<AnalysisDiagnostic>, lookup_rule: F)
148where
149    F: Fn(&str) -> Option<RuleConfig>,
150{
151    diagnostics.retain_mut(|diag| {
152        let code = diag.kind.code();
153        let Some(rule) = lookup_rule(&code) else {
154            return true;
155        };
156        match rule.severity() {
157            Severity::Allow => false,
158            Severity::Warn => true,
159            Severity::Deny => {
160                diag.severity = DiagnosticSeverity::Error;
161                true
162            }
163        }
164    });
165}
166
167#[derive(Debug, Clone, PartialEq, Eq)]
168pub struct AnalysisDiagnostic {
169    pub range: Range,
170    /// Severity, set by the analyser for every diagnostic it
171    /// produces. `lex-lsp` reads this directly when mapping onto LSP
172    /// wire severities; the kind-to-severity mapping that lived in
173    /// `to_lsp_diagnostic` is no longer authoritative.
174    pub severity: DiagnosticSeverity,
175    pub kind: DiagnosticKind,
176    pub message: String,
177}
178
179/// Run the analyser without an extension registry — equivalent to
180/// running with an empty registry. Provided for callers that haven't
181/// adopted the extension system yet.
182pub fn analyze(document: &Document) -> Vec<AnalysisDiagnostic> {
183    let registry = Registry::new();
184    analyze_with_registry(document, &registry)
185}
186
187/// Run the analyser with a populated extension registry. Labels whose
188/// namespace is registered get pre-validated against their schema and,
189/// if pre-validation passes, dispatched to the handler's `on_validate`
190/// hook. Handler-emitted diagnostics are merged into the same stream as
191/// the built-in checks.
192pub fn analyze_with_registry(document: &Document, registry: &Registry) -> Vec<AnalysisDiagnostic> {
193    let mut diagnostics = Vec::new();
194    check_footnotes(document, &mut diagnostics);
195    check_tables(document, &mut diagnostics);
196    check_labels(document, &mut diagnostics);
197    crate::label_dispatch::dispatch_labels(document, registry, &mut diagnostics);
198    diagnostics
199}
200
201/// Run the analyser with both an extension registry and a
202/// `[diagnostics.rules]` configuration. The configuration is applied
203/// after all checks run, so rule overrides ([`Severity::Allow`] /
204/// [`Severity::Deny`]) take effect uniformly across the diagnostic
205/// stream.
206pub fn analyze_with_rules(
207    document: &Document,
208    registry: &Registry,
209    rules: &DiagnosticsRulesConfig,
210) -> Vec<AnalysisDiagnostic> {
211    let mut diagnostics = analyze_with_registry(document, registry);
212    apply_rules(&mut diagnostics, |code| rules.lookup_by_code(code).cloned());
213    diagnostics
214}
215
216/// Walk every label site in the document and re-classify via
217/// [`classify_label`](lex_core::lex::assembling::stages::normalize_labels::classify_label).
218/// Emits diagnostics for sites that strict-mode parsing would have
219/// rejected — `doc.*` (forbidden) and unknown `lex.*` (not a
220/// registered canonical). The LSP-side permissive parse keeps the
221/// AST building so these surface as in-place diagnostics rather than
222/// as a wholesale parse failure.
223fn check_labels(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
224    use lex_core::lex::assembling::stages::normalize_labels::{
225        classify_label, RejectReason, Resolution,
226    };
227    use lex_core::lex::ast::Label;
228
229    fn emit(label: &Label, diagnostics: &mut Vec<AnalysisDiagnostic>) {
230        if let Resolution::Rejected(reason) = classify_label(&label.value) {
231            // Reuse the normative wording from `RejectReason::message()`
232            // so the strict-mode parser error and the permissive-mode
233            // analysis diagnostic stay literally identical — no chance
234            // of wording drift between the two surfaces.
235            let message = reason.message();
236            let kind = match reason {
237                RejectReason::Forbidden { .. } => DiagnosticKind::ForbiddenLabelPrefix,
238                RejectReason::UnknownCanonical { .. } => DiagnosticKind::UnknownLexCanonical,
239            };
240            diagnostics.push(AnalysisDiagnostic {
241                range: label.location.clone(),
242                severity: DiagnosticSeverity::Error,
243                kind,
244                message,
245            });
246        }
247    }
248
249    // Unified dispatch: every ContentItem flows through `walk_item`,
250    // which emits the type-specific label sites (annotation label,
251    // verbatim closer label, table cells/footnotes) exactly once and
252    // then defers to `attached_annotations` + `item.children()` for
253    // the uniform recursion. The earlier shape had type-specific
254    // walkers (`walk_annotation`, `walk_verbatim`, `walk_table`) that
255    // descended on their own and then `walk_item` descended again —
256    // duplicate-walk regression caught by Copilot's review on PR 589.
257    fn walk_item(item: &ContentItem, diagnostics: &mut Vec<AnalysisDiagnostic>) {
258        match item {
259            ContentItem::Annotation(a) => emit(&a.data.label, diagnostics),
260            ContentItem::VerbatimBlock(v) => emit(&v.closing_data.label, diagnostics),
261            ContentItem::Table(t) => {
262                for row in t.header_rows.iter().chain(t.body_rows.iter()) {
263                    for cell in &row.cells {
264                        for child in cell.children.iter() {
265                            walk_item(child, diagnostics);
266                        }
267                    }
268                }
269                if let Some(footnotes) = t.footnotes.as_ref() {
270                    for ann in footnotes.annotations() {
271                        walk_annotation(ann, diagnostics);
272                    }
273                    for fn_item in footnotes.items.iter() {
274                        walk_item(fn_item, diagnostics);
275                    }
276                }
277            }
278            _ => {}
279        }
280        // Attached annotations (sessions, paragraphs, lists, list
281        // items, verbatim blocks, tables — see `attached_annotations`).
282        if let Some(attached) = attached_annotations(item) {
283            for annotation in attached {
284                walk_annotation(annotation, diagnostics);
285            }
286        }
287        // Generic child descent. For ContentItem::Annotation,
288        // `item.children()` returns the annotation's body children, so
289        // type-specific walking of nested annotations is not needed.
290        if let Some(children) = item.children() {
291            for child in children {
292                walk_item(child, diagnostics);
293            }
294        }
295    }
296
297    fn walk_annotation(annotation: &Annotation, diagnostics: &mut Vec<AnalysisDiagnostic>) {
298        emit(&annotation.data.label, diagnostics);
299        for child in annotation.children.iter() {
300            walk_item(child, diagnostics);
301        }
302    }
303
304    fn walk_session(session: &Session, diagnostics: &mut Vec<AnalysisDiagnostic>) {
305        for annotation in session.annotations() {
306            walk_annotation(annotation, diagnostics);
307        }
308        for child in &session.children {
309            walk_item(child, diagnostics);
310        }
311    }
312
313    fn attached_annotations(item: &ContentItem) -> Option<&[Annotation]> {
314        match item {
315            ContentItem::Session(s) => Some(s.annotations()),
316            ContentItem::Paragraph(p) => Some(p.annotations()),
317            ContentItem::Definition(d) => Some(d.annotations()),
318            ContentItem::List(l) => Some(l.annotations()),
319            ContentItem::ListItem(li) => Some(li.annotations()),
320            ContentItem::VerbatimBlock(v) => Some(v.annotations()),
321            ContentItem::Table(t) => Some(t.annotations()),
322            _ => None,
323        }
324    }
325
326    // Document-level annotations.
327    for annotation in document.annotations() {
328        walk_annotation(annotation, diagnostics);
329    }
330    // Root session walks.
331    walk_session(&document.root, diagnostics);
332}
333
334fn check_footnotes(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
335    // Numbered definitions reachable from outside any table: :: notes ::
336    // annotated lists at document or session scope.
337    let outer_defs: HashSet<u32> = crate::utils::collect_footnote_definitions(document)
338        .into_iter()
339        .filter_map(|(label, _)| label.parse::<u32>().ok())
340        .collect();
341
342    // References outside tables resolve to `outer_defs`; references inside a
343    // table resolve first to that table's own positional footnote list
344    // (`table.footnotes`) and then fall back to `outer_defs`.
345    if let Some(title) = &document.title {
346        check_text(&title.content, &outer_defs, diagnostics);
347    }
348    for annotation in document.annotations() {
349        check_annotation(annotation, &outer_defs, diagnostics);
350    }
351    check_session(&document.root, &outer_defs, diagnostics);
352}
353
354fn check_session(
355    session: &Session,
356    defs: &HashSet<u32>,
357    diagnostics: &mut Vec<AnalysisDiagnostic>,
358) {
359    check_text(&session.title, defs, diagnostics);
360    for annotation in session.annotations() {
361        check_annotation(annotation, defs, diagnostics);
362    }
363    for child in session.children.iter() {
364        check_content(child, defs, diagnostics);
365    }
366}
367
368fn check_content(
369    item: &ContentItem,
370    defs: &HashSet<u32>,
371    diagnostics: &mut Vec<AnalysisDiagnostic>,
372) {
373    match item {
374        ContentItem::Paragraph(p) => {
375            for line in &p.lines {
376                if let ContentItem::TextLine(tl) = line {
377                    check_text(&tl.content, defs, diagnostics);
378                }
379            }
380            for annotation in p.annotations() {
381                check_annotation(annotation, defs, diagnostics);
382            }
383        }
384        ContentItem::Session(s) => check_session(s, defs, diagnostics),
385        ContentItem::List(list) => {
386            for annotation in list.annotations() {
387                check_annotation(annotation, defs, diagnostics);
388            }
389            for entry in &list.items {
390                if let ContentItem::ListItem(li) = entry {
391                    for text in &li.text {
392                        check_text(text, defs, diagnostics);
393                    }
394                    for annotation in li.annotations() {
395                        check_annotation(annotation, defs, diagnostics);
396                    }
397                    for child in li.children.iter() {
398                        check_content(child, defs, diagnostics);
399                    }
400                }
401            }
402        }
403        ContentItem::Definition(def) => {
404            check_text(&def.subject, defs, diagnostics);
405            for annotation in def.annotations() {
406                check_annotation(annotation, defs, diagnostics);
407            }
408            for child in def.children.iter() {
409                check_content(child, defs, diagnostics);
410            }
411        }
412        ContentItem::Annotation(a) => check_annotation(a, defs, diagnostics),
413        ContentItem::VerbatimBlock(v) => {
414            check_text(&v.subject, defs, diagnostics);
415            for annotation in v.annotations() {
416                check_annotation(annotation, defs, diagnostics);
417            }
418        }
419        ContentItem::Table(table) => check_table(table, defs, diagnostics),
420        _ => {}
421    }
422}
423
424fn check_annotation(
425    annotation: &Annotation,
426    defs: &HashSet<u32>,
427    diagnostics: &mut Vec<AnalysisDiagnostic>,
428) {
429    for child in annotation.children.iter() {
430        check_content(child, defs, diagnostics);
431    }
432}
433
434fn check_table(
435    table: &Table,
436    outer_defs: &HashSet<u32>,
437    diagnostics: &mut Vec<AnalysisDiagnostic>,
438) {
439    // Extend the in-scope definitions with the table's positional footnote
440    // list. The table's own numbered items shadow nothing — they just add
441    // table-local numbers that references inside this table may resolve to.
442    // Fast path: most tables have no footnotes, so reuse `outer_defs` rather
443    // than cloning it into a new `HashSet` for every such table.
444    let table_defs = table_footnote_numbers(table);
445    if table_defs.is_empty() {
446        check_table_text(table, outer_defs, diagnostics);
447        return;
448    }
449    let mut scope = outer_defs.clone();
450    scope.extend(table_defs);
451    check_table_text(table, &scope, diagnostics);
452}
453
454fn check_table_text(table: &Table, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
455    check_text(&table.subject, defs, diagnostics);
456    for row in table.all_rows() {
457        for cell in &row.cells {
458            check_text(&cell.content, defs, diagnostics);
459        }
460    }
461    for annotation in table.annotations() {
462        check_annotation(annotation, defs, diagnostics);
463    }
464}
465
466fn table_footnote_numbers(table: &Table) -> HashSet<u32> {
467    let Some(list) = &table.footnotes else {
468        return HashSet::new();
469    };
470    let mut numbers = HashSet::new();
471    for entry in &list.items {
472        if let ContentItem::ListItem(li) = entry {
473            let label = li
474                .marker()
475                .trim()
476                .trim_end_matches(['.', ')', ':'].as_ref())
477                .trim();
478            if let Ok(n) = label.parse::<u32>() {
479                numbers.insert(n);
480            }
481        }
482    }
483    numbers
484}
485
486fn check_text(text: &TextContent, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
487    for reference in extract_references(text) {
488        if let ReferenceType::FootnoteNumber { number } = reference.reference_type {
489            if !defs.contains(&number) {
490                diagnostics.push(AnalysisDiagnostic {
491                    range: reference.range,
492                    severity: DiagnosticSeverity::Error,
493                    kind: DiagnosticKind::MissingFootnoteDefinition,
494                    message: format!(
495                        "Footnote [{number}] has no matching footnote definition in scope"
496                    ),
497                });
498            }
499        }
500    }
501}
502
503fn check_tables(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
504    visit_tables_in_session(&document.root, diagnostics);
505}
506
507fn visit_tables_in_session(session: &Session, diagnostics: &mut Vec<AnalysisDiagnostic>) {
508    for child in session.children.iter() {
509        visit_tables_in_content(child, diagnostics);
510    }
511}
512
513fn visit_tables_in_content(item: &ContentItem, diagnostics: &mut Vec<AnalysisDiagnostic>) {
514    match item {
515        ContentItem::Table(table) => check_table_columns(table, diagnostics),
516        ContentItem::Session(session) => visit_tables_in_session(session, diagnostics),
517        ContentItem::Definition(def) => {
518            for child in def.children.iter() {
519                visit_tables_in_content(child, diagnostics);
520            }
521        }
522        ContentItem::List(list) => {
523            for entry in &list.items {
524                if let ContentItem::ListItem(li) = entry {
525                    for child in li.children.iter() {
526                        visit_tables_in_content(child, diagnostics);
527                    }
528                }
529            }
530        }
531        ContentItem::Annotation(ann) => {
532            for child in ann.children.iter() {
533                visit_tables_in_content(child, diagnostics);
534            }
535        }
536        _ => {}
537    }
538}
539
540/// Check that all rows in a table have the same effective column count.
541///
542/// The effective width of a row accounts for both colspans of its own cells
543/// and rowspan carry-over from cells in prior rows that extend into it.
544/// Rows with different effective widths indicate a structural error (missing
545/// or extra cells).
546fn check_table_columns(table: &Table, diagnostics: &mut Vec<AnalysisDiagnostic>) {
547    let rows: Vec<_> = table.all_rows().collect();
548    if rows.len() < 2 {
549        return;
550    }
551
552    let widths = compute_row_widths(&rows);
553    let expected = widths[0];
554    for (i, &width) in widths.iter().enumerate().skip(1) {
555        if width != expected {
556            diagnostics.push(AnalysisDiagnostic {
557                range: rows[i].location.clone(),
558                severity: DiagnosticSeverity::Warning,
559                kind: DiagnosticKind::TableInconsistentColumns,
560                message: format!(
561                    "Row has {width} columns, expected {expected} (matching first row)"
562                ),
563            });
564        }
565    }
566}
567
568/// Simulate the virtual table grid to compute each row's effective width.
569///
570/// `carry[col]` tracks how many more rows (including the current one) a cell
571/// placed in a prior row still occupies column `col`. Own cells skip columns
572/// where `carry[col] > 0` (those are held by a cell from above via rowspan).
573fn compute_row_widths(rows: &[&TableRow]) -> Vec<usize> {
574    let mut carry: Vec<usize> = Vec::new();
575    let mut widths = Vec::with_capacity(rows.len());
576
577    for row in rows {
578        let mut col = 0;
579        for cell in &row.cells {
580            while col < carry.len() && carry[col] > 0 {
581                col += 1;
582            }
583            let end = col + cell.colspan;
584            if end > carry.len() {
585                carry.resize(end, 0);
586            }
587            for slot in carry.iter_mut().take(end).skip(col) {
588                *slot = cell.rowspan;
589            }
590            col = end;
591        }
592
593        let width = carry
594            .iter()
595            .rposition(|&r| r > 0)
596            .map(|i| i + 1)
597            .unwrap_or(0);
598        widths.push(width);
599
600        // Columns at or beyond `width` are guaranteed 0 (that's how width is
601        // defined), so limit the decrement to the active range and drop the
602        // trailing zeros to keep `carry` proportional to the live grid.
603        for c in carry.iter_mut().take(width) {
604            if *c > 0 {
605                *c -= 1;
606            }
607        }
608        carry.truncate(width);
609    }
610
611    widths
612}
613
614#[cfg(test)]
615mod tests {
616    use super::*;
617    use lex_core::lex::parsing::process_full_permissive;
618    use lex_core::lex::testing::lexplore::Lexplore;
619
620    fn footnote_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
621        analyze(doc)
622            .into_iter()
623            .filter(|d| d.kind == DiagnosticKind::MissingFootnoteDefinition)
624            .collect()
625    }
626
627    fn label_diags(source: &str) -> Vec<AnalysisDiagnostic> {
628        let doc = process_full_permissive(source).expect("permissive parse");
629        analyze(&doc)
630            .into_iter()
631            .filter(|d| {
632                matches!(
633                    d.kind,
634                    DiagnosticKind::ForbiddenLabelPrefix | DiagnosticKind::UnknownLexCanonical
635                )
636            })
637            .collect()
638    }
639
640    #[test]
641    fn check_labels_emits_for_doc_prefix() {
642        let diags = label_diags(":: doc.table :: x\n\nBody.\n");
643        assert_eq!(diags.len(), 1, "expected 1 forbidden-prefix diagnostic");
644        assert_eq!(diags[0].kind, DiagnosticKind::ForbiddenLabelPrefix);
645        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
646        assert!(
647            diags[0].message.contains("doc.table") && diags[0].message.contains("reserved"),
648            "message names the offending prefix; got: {}",
649            diags[0].message
650        );
651    }
652
653    #[test]
654    fn check_labels_emits_for_unknown_lex_canonical() {
655        let diags = label_diags(":: lex.foobar :: x\n\nBody.\n");
656        assert_eq!(diags.len(), 1, "expected 1 unknown-canonical diagnostic");
657        assert_eq!(diags[0].kind, DiagnosticKind::UnknownLexCanonical);
658        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
659        assert!(
660            diags[0].message.contains("lex.foobar"),
661            "message names the offending label; got: {}",
662            diags[0].message
663        );
664    }
665
666    #[test]
667    fn check_labels_silent_on_accepted_forms() {
668        // Shortcut, prefix-stripped, canonical, and community labels
669        // all accept silently — analysis only flags the two reject
670        // categories from `classify_label`.
671        let sources = [
672            ":: author :: Alice\n\nBody.\n",
673            ":: metadata.author :: Alice\n\nBody.\n",
674            ":: lex.metadata.author :: Alice\n\nBody.\n",
675            ":: acme.task :: x\n\nBody.\n",
676        ];
677        for src in sources {
678            let diags = label_diags(src);
679            assert!(
680                diags.is_empty(),
681                "no label diagnostics expected for {src:?}; got {diags:?}"
682            );
683        }
684    }
685
686    #[test]
687    fn check_labels_finds_verbatim_closer_violations() {
688        let diags =
689            label_diags("Table:\n    | a | b |\n    |---|---|\n    | 1 | 2 |\n:: doc.table ::\n");
690        assert_eq!(diags.len(), 1);
691        assert_eq!(diags[0].kind, DiagnosticKind::ForbiddenLabelPrefix);
692    }
693
694    #[test]
695    fn check_labels_emits_each_offending_site_exactly_once() {
696        // Regression for Copilot's PR 589 callout: the earlier
697        // walker shape descended into a node's children twice (once
698        // via the type-specific helper, once via the generic
699        // `walk_item` fallback), which produced duplicate
700        // diagnostics for any forbidden label nested inside another
701        // label-bearing site. Three nested + adjacent forbidden
702        // labels should produce exactly three diagnostics, not six.
703        let src = ":: doc.outer ::\n    :: doc.inner :: nested body\n\n:: doc.sibling :: x\n";
704        let diags = label_diags(src);
705        assert_eq!(
706            diags.len(),
707            3,
708            "exactly one diagnostic per offending site: {diags:?}"
709        );
710        for d in &diags {
711            assert_eq!(d.kind, DiagnosticKind::ForbiddenLabelPrefix);
712        }
713    }
714
715    #[test]
716    fn detects_missing_footnote_definition() {
717        let doc = Lexplore::footnotes(1).parse().unwrap();
718        let diags = analyze(&doc);
719        assert_eq!(diags.len(), 1);
720        assert_eq!(diags[0].kind, DiagnosticKind::MissingFootnoteDefinition);
721    }
722
723    #[test]
724    fn ignores_valid_footnote_with_notes_annotation() {
725        // :: notes :: annotated list at the document root provides the definitions
726        let doc = Lexplore::footnotes(2).parse().unwrap();
727        assert!(footnote_diags(&doc).is_empty());
728    }
729
730    #[test]
731    fn ignores_valid_list_footnote_in_session() {
732        // :: notes :: inside a session
733        let doc = Lexplore::footnotes(3).parse().unwrap();
734        assert!(footnote_diags(&doc).is_empty());
735    }
736
737    #[test]
738    fn list_without_notes_annotation_is_not_footnotes() {
739        // A "Notes" session without :: notes :: does NOT define footnotes
740        let doc = Lexplore::footnotes(4).parse().unwrap();
741        assert_eq!(footnote_diags(&doc).len(), 1);
742    }
743
744    fn table_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
745        analyze(doc)
746            .into_iter()
747            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
748            .collect()
749    }
750
751    #[test]
752    fn detects_inconsistent_table_columns() {
753        // table-13: 3-col header, 2-col row, 3-col row — middle row is short.
754        let doc = Lexplore::table(13).parse().unwrap();
755        let diags = table_diags(&doc);
756        assert_eq!(diags.len(), 1);
757        assert!(diags[0].message.contains("2 columns"));
758        assert!(diags[0].message.contains("expected 3"));
759    }
760
761    #[test]
762    fn consistent_table_no_diagnostic() {
763        // table-01: minimal 2-column table, all rows consistent.
764        let doc = Lexplore::table(1).parse().unwrap();
765        assert!(table_diags(&doc).is_empty());
766    }
767
768    #[test]
769    fn table_with_rowspan_counts_carry_over() {
770        // table-17: rowspan via ^^ — effective widths remain consistent across rows.
771        let doc = Lexplore::table(17).parse().unwrap();
772        let diags = table_diags(&doc);
773        assert!(
774            diags.is_empty(),
775            "rowspan carry-over should not trigger inconsistent-columns, got: {diags:?}"
776        );
777    }
778
779    #[test]
780    fn table_with_colspan_and_rowspan_mixed() {
781        // table-18: combined >> colspan and ^^ rowspan; effective widths stay consistent.
782        let doc = Lexplore::table(18).parse().unwrap();
783        let diags = table_diags(&doc);
784        assert!(
785            diags.is_empty(),
786            "mixed colspan/rowspan should not trigger inconsistent-columns, got: {diags:?}"
787        );
788    }
789
790    #[test]
791    fn table_with_colspan_counts_effective_width() {
792        // table-04: colspan via >> contributes to effective width; all rows consistent.
793        let doc = Lexplore::table(4).parse().unwrap();
794        assert!(table_diags(&doc).is_empty());
795    }
796
797    #[test]
798    fn footnote_ref_in_table_cell_is_checked() {
799        // footnotes-09: table cell contains [1] but no footnote definition
800        // anywhere in scope — document, session, or table-local.
801        let doc = Lexplore::footnotes(9).parse().unwrap();
802        let diags = footnote_diags(&doc);
803        assert_eq!(diags.len(), 1);
804        assert!(diags[0].message.contains("[1]"));
805    }
806
807    #[test]
808    fn table_scoped_footnotes_resolve_cell_refs() {
809        // footnotes-11: cell refs [1] and [2] resolve to the table's own
810        // positional footnote list (no :: notes :: annotation needed).
811        let doc = Lexplore::footnotes(11).parse().unwrap();
812        let diags = footnote_diags(&doc);
813        assert!(
814            diags.is_empty(),
815            "table-scoped cell refs should resolve to table.footnotes, got: {diags:?}"
816        );
817    }
818
819    #[test]
820    fn table_scoped_footnotes_do_not_leak_out() {
821        // footnotes-12: a [1] ref in body text outside the table must NOT
822        // resolve to the table's own positional footnote list even when the
823        // numbers happen to match. The table's list is table-local.
824        let doc = Lexplore::footnotes(12).parse().unwrap();
825        let diags = footnote_diags(&doc);
826        assert_eq!(
827            diags.len(),
828            1,
829            "only the paragraph ref [1] should be unresolved, got: {diags:?}"
830        );
831        assert!(diags[0].message.contains("[1]"));
832    }
833
834    // ─────────────── apply_rules / DiagnosticKind::code ───────────────
835
836    fn dummy_diag(kind: DiagnosticKind, severity: DiagnosticSeverity) -> AnalysisDiagnostic {
837        AnalysisDiagnostic {
838            range: Range::default(),
839            severity,
840            kind,
841            message: "test".into(),
842        }
843    }
844
845    #[test]
846    fn diagnostic_kind_code_matches_lookup_for_every_builtin() {
847        // Drift test: every built-in DiagnosticKind variant must have a
848        // matching entry in DiagnosticsRulesConfig::lookup_by_code so
849        // configuration overrides reach every rule.
850        let rules = DiagnosticsRulesConfig::default();
851        for kind in [
852            DiagnosticKind::MissingFootnoteDefinition,
853            DiagnosticKind::UnusedFootnoteDefinition,
854            DiagnosticKind::TableInconsistentColumns,
855            DiagnosticKind::ForbiddenLabelPrefix,
856            DiagnosticKind::UnknownLexCanonical,
857            DiagnosticKind::SchemaValidation(SchemaValidationKind::UnknownLabel),
858            DiagnosticKind::SchemaValidation(SchemaValidationKind::MissingParam),
859            DiagnosticKind::SchemaValidation(SchemaValidationKind::ParamTypeMismatch),
860            DiagnosticKind::SchemaValidation(SchemaValidationKind::BadAttachment),
861            DiagnosticKind::SchemaValidation(SchemaValidationKind::BodyShapeMismatch),
862        ] {
863            let code = kind.code();
864            assert!(
865                rules.lookup_by_code(&code).is_some(),
866                "DiagnosticsRulesConfig is missing a field for built-in code {code:?} \
867                 — add it to lookup_by_code (and likely as a struct field too)"
868            );
869        }
870    }
871
872    #[test]
873    fn handler_code_carries_namespace_prefix() {
874        // Wire-shape contract (spec §9): the wire `code` is the
875        // namespace-prefixed form so a `.lex.toml` rule like
876        // `"acme.task-stuck" = "deny"` actually matches what the
877        // handler emitted. The handler supplies the bare leaf (`code`
878        // field on `Diagnostic`); the analyser glues on the namespace.
879        let with_code = DiagnosticKind::Handler {
880            namespace: "acme".into(),
881            code: Some("task-stuck".into()),
882        };
883        assert_eq!(with_code.code(), "acme.task-stuck");
884        // Code-less handler diagnostic gets a per-namespace fallback
885        // — users can target it as `"acme.diagnostic" = "warn"` rather
886        // than a single global literal.
887        let without_code = DiagnosticKind::Handler {
888            namespace: "acme".into(),
889            code: None,
890        };
891        assert_eq!(without_code.code(), "acme.diagnostic");
892    }
893
894    #[test]
895    fn apply_rules_matches_extension_code_via_side_channel() {
896        // End-to-end: handler emits `acme.foo`, user configured
897        // `"acme.foo" = "allow"` in `[diagnostics.rules]` (now
898        // captured into the LSP's `extension_diagnostic_rules`
899        // side-channel by the `on_unknown_key` callback rather than
900        // landing in a `#[serde(flatten)] extra` map); diagnostic
901        // gets dropped.
902        use std::collections::BTreeMap;
903        // The closure mirrors `LoadedLexConfig::lookup_diagnostic_rule`:
904        // built-in first, side-channel second.
905        let lookup = |code: &str, side: &BTreeMap<String, lex_config::RuleConfig>| {
906            DiagnosticsRulesConfig::default()
907                .lookup_by_code(code)
908                .cloned()
909                .or_else(|| side.get(code).cloned())
910        };
911
912        let side: BTreeMap<String, lex_config::RuleConfig> = [(
913            "acme.foo".to_string(),
914            lex_config::RuleConfig::Bare(Severity::Allow),
915        )]
916        .into_iter()
917        .collect();
918        let mut diags = vec![dummy_diag(
919            DiagnosticKind::Handler {
920                namespace: "acme".into(),
921                code: Some("foo".into()),
922            },
923            DiagnosticSeverity::Error,
924        )];
925        apply_rules(&mut diags, |code| lookup(code, &side));
926        assert!(diags.is_empty(), "allow drops the extension diagnostic");
927
928        // `warn` keeps the intrinsic severity (Error stays Error).
929        let side: BTreeMap<String, lex_config::RuleConfig> = [(
930            "acme.foo".to_string(),
931            lex_config::RuleConfig::Bare(Severity::Warn),
932        )]
933        .into_iter()
934        .collect();
935        let mut diags = vec![dummy_diag(
936            DiagnosticKind::Handler {
937                namespace: "acme".into(),
938                code: Some("foo".into()),
939            },
940            DiagnosticSeverity::Error,
941        )];
942        apply_rules(&mut diags, |code| lookup(code, &side));
943        assert_eq!(diags.len(), 1);
944        assert_eq!(
945            diags[0].severity,
946            DiagnosticSeverity::Error,
947            "warn preserves the handler's intrinsic severity"
948        );
949
950        // `deny` is a no-op when the intrinsic is already Error, but
951        // still keeps the diagnostic — symmetry with built-ins.
952        let side: BTreeMap<String, lex_config::RuleConfig> = [(
953            "acme.foo".to_string(),
954            lex_config::RuleConfig::Bare(Severity::Deny),
955        )]
956        .into_iter()
957        .collect();
958        let mut diags = vec![dummy_diag(
959            DiagnosticKind::Handler {
960                namespace: "acme".into(),
961                code: Some("foo".into()),
962            },
963            DiagnosticSeverity::Error,
964        )];
965        apply_rules(&mut diags, |code| lookup(code, &side));
966        assert_eq!(diags.len(), 1);
967        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
968
969        // A configured rule whose code doesn't match the emitted one
970        // passes the diagnostic through untouched.
971        let side: BTreeMap<String, lex_config::RuleConfig> = [(
972            "acme.other".to_string(),
973            lex_config::RuleConfig::Bare(Severity::Allow),
974        )]
975        .into_iter()
976        .collect();
977        let mut diags = vec![dummy_diag(
978            DiagnosticKind::Handler {
979                namespace: "acme".into(),
980                code: Some("foo".into()),
981            },
982            DiagnosticSeverity::Warning,
983        )];
984        apply_rules(&mut diags, |code| lookup(code, &side));
985        assert_eq!(diags.len(), 1);
986        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
987    }
988
989    #[test]
990    fn apply_rules_allow_drops_diagnostic() {
991        let mut diags = vec![dummy_diag(
992            DiagnosticKind::MissingFootnoteDefinition,
993            DiagnosticSeverity::Error,
994        )];
995        let rules = DiagnosticsRulesConfig {
996            missing_footnote: lex_config::RuleConfig::Bare(Severity::Allow),
997            ..Default::default()
998        };
999        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1000        assert!(diags.is_empty(), "allow should drop the diagnostic");
1001    }
1002
1003    #[test]
1004    fn apply_rules_deny_upgrades_to_error() {
1005        let mut diags = vec![dummy_diag(
1006            DiagnosticKind::TableInconsistentColumns,
1007            DiagnosticSeverity::Warning,
1008        )];
1009        let rules = DiagnosticsRulesConfig {
1010            table_inconsistent_columns: lex_config::RuleConfig::Bare(Severity::Deny),
1011            ..Default::default()
1012        };
1013        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1014        assert_eq!(diags.len(), 1);
1015        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1016    }
1017
1018    #[test]
1019    fn apply_rules_warn_keeps_intrinsic_severity() {
1020        let mut diags = vec![dummy_diag(
1021            DiagnosticKind::TableInconsistentColumns,
1022            DiagnosticSeverity::Warning,
1023        )];
1024        let rules = DiagnosticsRulesConfig {
1025            table_inconsistent_columns: lex_config::RuleConfig::Bare(Severity::Warn),
1026            ..Default::default()
1027        };
1028        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1029        assert_eq!(diags.len(), 1);
1030        assert_eq!(
1031            diags[0].severity,
1032            DiagnosticSeverity::Warning,
1033            "warn should not change the intrinsic severity"
1034        );
1035    }
1036
1037    #[test]
1038    fn apply_rules_unknown_code_is_passthrough() {
1039        // An extension-emitted diagnostic with a code the registry
1040        // does not know about must pass through unmodified. The
1041        // handler's `code` is the bare leaf — the analyser glues on
1042        // `acme.` to produce wire `acme.unknown`.
1043        let mut diags = vec![dummy_diag(
1044            DiagnosticKind::Handler {
1045                namespace: "acme".into(),
1046                code: Some("unknown".into()),
1047            },
1048            DiagnosticSeverity::Warning,
1049        )];
1050        let rules = DiagnosticsRulesConfig::default();
1051        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1052        assert_eq!(diags.len(), 1, "unknown codes should pass through");
1053        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1054    }
1055
1056    #[test]
1057    fn apply_rules_preserves_order_of_kept_diagnostics() {
1058        // Mixed stream: one to drop, one to keep, one to upgrade.
1059        let mut diags = vec![
1060            dummy_diag(
1061                DiagnosticKind::MissingFootnoteDefinition,
1062                DiagnosticSeverity::Error,
1063            ),
1064            dummy_diag(
1065                DiagnosticKind::UnusedFootnoteDefinition,
1066                DiagnosticSeverity::Warning,
1067            ),
1068            dummy_diag(
1069                DiagnosticKind::TableInconsistentColumns,
1070                DiagnosticSeverity::Warning,
1071            ),
1072        ];
1073        let rules = DiagnosticsRulesConfig {
1074            missing_footnote: lex_config::RuleConfig::Bare(Severity::Allow),
1075            table_inconsistent_columns: lex_config::RuleConfig::Bare(Severity::Deny),
1076            ..Default::default()
1077        };
1078        apply_rules(&mut diags, |code| rules.lookup_by_code(code).cloned());
1079        assert_eq!(diags.len(), 2);
1080        assert_eq!(diags[0].kind, DiagnosticKind::UnusedFootnoteDefinition);
1081        assert_eq!(diags[0].severity, DiagnosticSeverity::Warning);
1082        assert_eq!(diags[1].kind, DiagnosticKind::TableInconsistentColumns);
1083        assert_eq!(diags[1].severity, DiagnosticSeverity::Error);
1084    }
1085}