Skip to main content

lex_analysis/
diagnostics.rs

1use crate::inline::extract_references;
2use lex_core::lex::ast::{
3    Annotation, ContentItem, Document, Range, Session, Table, TableRow, TextContent,
4};
5use lex_core::lex::inlines::ReferenceType;
6use lex_extension_host::Registry;
7use std::collections::HashSet;
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum DiagnosticKind {
11    MissingFootnoteDefinition,
12    UnusedFootnoteDefinition,
13    TableInconsistentColumns,
14    /// A label invocation failed schema pre-validation before the
15    /// handler was dispatched. The variant carries which of the six
16    /// pre-validation checks tripped.
17    SchemaValidation(SchemaValidationKind),
18    /// A diagnostic emitted by a registered extension handler. The
19    /// `namespace` field is the namespace name (the part before the
20    /// first `.`, e.g., `"acme"` for label `"acme.task"`) — `lex-lsp`
21    /// surfaces it as the diagnostic `source: "lex:<namespace>"` so
22    /// editors can filter by extension. `code` mirrors the wire
23    /// `Diagnostic.code` field.
24    Handler {
25        namespace: String,
26        code: Option<String>,
27    },
28}
29
30/// Severity for analysis-emitted diagnostics. The analyser populates
31/// it for every diagnostic — `lex-lsp` reads `diag.severity`
32/// directly when mapping onto the LSP wire. (Earlier the LSP layer
33/// derived severity from `DiagnosticKind`; that mapping moved
34/// upstream once the extension-emitted diagnostics needed
35/// per-instance severities.)
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum DiagnosticSeverity {
38    Error,
39    Warning,
40    Info,
41    Hint,
42}
43
44/// One of the six schema pre-validation checks the analyser owns
45/// before dispatching to a handler. Wire spec / proposal §13.2.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub enum SchemaValidationKind {
48    /// The namespace is registered but the schema set for that
49    /// namespace doesn't declare this exact label. The walker emits
50    /// this when `Registry::schema_for(label)` returns `None` while
51    /// `is_namespace_healthy(<ns prefix>)` is `true`. Distinguishes
52    /// "typo / out-of-version label" (this variant, surfaced as a
53    /// document error) from "unknown namespace" (silent pass-through
54    /// per the bounded-extensibility rule).
55    UnknownLabel,
56    MissingParam,
57    ParamTypeMismatch,
58    BadAttachment,
59    BodyShapeMismatch,
60}
61
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct AnalysisDiagnostic {
64    pub range: Range,
65    /// Severity, set by the analyser for every diagnostic it
66    /// produces. `lex-lsp` reads this directly when mapping onto LSP
67    /// wire severities; the kind-to-severity mapping that lived in
68    /// `to_lsp_diagnostic` is no longer authoritative.
69    pub severity: DiagnosticSeverity,
70    pub kind: DiagnosticKind,
71    pub message: String,
72}
73
74/// Run the analyser without an extension registry — equivalent to
75/// running with an empty registry. Provided for callers that haven't
76/// adopted the extension system yet.
77pub fn analyze(document: &Document) -> Vec<AnalysisDiagnostic> {
78    let registry = Registry::new();
79    analyze_with_registry(document, &registry)
80}
81
82/// Run the analyser with a populated extension registry. Labels whose
83/// namespace is registered get pre-validated against their schema and,
84/// if pre-validation passes, dispatched to the handler's `on_validate`
85/// hook. Handler-emitted diagnostics are merged into the same stream as
86/// the built-in checks.
87pub fn analyze_with_registry(document: &Document, registry: &Registry) -> Vec<AnalysisDiagnostic> {
88    let mut diagnostics = Vec::new();
89    check_footnotes(document, &mut diagnostics);
90    check_tables(document, &mut diagnostics);
91    crate::label_dispatch::dispatch_labels(document, registry, &mut diagnostics);
92    diagnostics
93}
94
95fn check_footnotes(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
96    // Numbered definitions reachable from outside any table: :: notes ::
97    // annotated lists at document or session scope.
98    let outer_defs: HashSet<u32> = crate::utils::collect_footnote_definitions(document)
99        .into_iter()
100        .filter_map(|(label, _)| label.parse::<u32>().ok())
101        .collect();
102
103    // References outside tables resolve to `outer_defs`; references inside a
104    // table resolve first to that table's own positional footnote list
105    // (`table.footnotes`) and then fall back to `outer_defs`.
106    if let Some(title) = &document.title {
107        check_text(&title.content, &outer_defs, diagnostics);
108    }
109    for annotation in document.annotations() {
110        check_annotation(annotation, &outer_defs, diagnostics);
111    }
112    check_session(&document.root, &outer_defs, diagnostics);
113}
114
115fn check_session(
116    session: &Session,
117    defs: &HashSet<u32>,
118    diagnostics: &mut Vec<AnalysisDiagnostic>,
119) {
120    check_text(&session.title, defs, diagnostics);
121    for annotation in session.annotations() {
122        check_annotation(annotation, defs, diagnostics);
123    }
124    for child in session.children.iter() {
125        check_content(child, defs, diagnostics);
126    }
127}
128
129fn check_content(
130    item: &ContentItem,
131    defs: &HashSet<u32>,
132    diagnostics: &mut Vec<AnalysisDiagnostic>,
133) {
134    match item {
135        ContentItem::Paragraph(p) => {
136            for line in &p.lines {
137                if let ContentItem::TextLine(tl) = line {
138                    check_text(&tl.content, defs, diagnostics);
139                }
140            }
141            for annotation in p.annotations() {
142                check_annotation(annotation, defs, diagnostics);
143            }
144        }
145        ContentItem::Session(s) => check_session(s, defs, diagnostics),
146        ContentItem::List(list) => {
147            for annotation in list.annotations() {
148                check_annotation(annotation, defs, diagnostics);
149            }
150            for entry in &list.items {
151                if let ContentItem::ListItem(li) = entry {
152                    for text in &li.text {
153                        check_text(text, defs, diagnostics);
154                    }
155                    for annotation in li.annotations() {
156                        check_annotation(annotation, defs, diagnostics);
157                    }
158                    for child in li.children.iter() {
159                        check_content(child, defs, diagnostics);
160                    }
161                }
162            }
163        }
164        ContentItem::Definition(def) => {
165            check_text(&def.subject, defs, diagnostics);
166            for annotation in def.annotations() {
167                check_annotation(annotation, defs, diagnostics);
168            }
169            for child in def.children.iter() {
170                check_content(child, defs, diagnostics);
171            }
172        }
173        ContentItem::Annotation(a) => check_annotation(a, defs, diagnostics),
174        ContentItem::VerbatimBlock(v) => {
175            check_text(&v.subject, defs, diagnostics);
176            for annotation in v.annotations() {
177                check_annotation(annotation, defs, diagnostics);
178            }
179        }
180        ContentItem::Table(table) => check_table(table, defs, diagnostics),
181        _ => {}
182    }
183}
184
185fn check_annotation(
186    annotation: &Annotation,
187    defs: &HashSet<u32>,
188    diagnostics: &mut Vec<AnalysisDiagnostic>,
189) {
190    for child in annotation.children.iter() {
191        check_content(child, defs, diagnostics);
192    }
193}
194
195fn check_table(
196    table: &Table,
197    outer_defs: &HashSet<u32>,
198    diagnostics: &mut Vec<AnalysisDiagnostic>,
199) {
200    // Extend the in-scope definitions with the table's positional footnote
201    // list. The table's own numbered items shadow nothing — they just add
202    // table-local numbers that references inside this table may resolve to.
203    // Fast path: most tables have no footnotes, so reuse `outer_defs` rather
204    // than cloning it into a new `HashSet` for every such table.
205    let table_defs = table_footnote_numbers(table);
206    if table_defs.is_empty() {
207        check_table_text(table, outer_defs, diagnostics);
208        return;
209    }
210    let mut scope = outer_defs.clone();
211    scope.extend(table_defs);
212    check_table_text(table, &scope, diagnostics);
213}
214
215fn check_table_text(table: &Table, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
216    check_text(&table.subject, defs, diagnostics);
217    for row in table.all_rows() {
218        for cell in &row.cells {
219            check_text(&cell.content, defs, diagnostics);
220        }
221    }
222    for annotation in table.annotations() {
223        check_annotation(annotation, defs, diagnostics);
224    }
225}
226
227fn table_footnote_numbers(table: &Table) -> HashSet<u32> {
228    let Some(list) = &table.footnotes else {
229        return HashSet::new();
230    };
231    let mut numbers = HashSet::new();
232    for entry in &list.items {
233        if let ContentItem::ListItem(li) = entry {
234            let label = li
235                .marker()
236                .trim()
237                .trim_end_matches(['.', ')', ':'].as_ref())
238                .trim();
239            if let Ok(n) = label.parse::<u32>() {
240                numbers.insert(n);
241            }
242        }
243    }
244    numbers
245}
246
247fn check_text(text: &TextContent, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
248    for reference in extract_references(text) {
249        if let ReferenceType::FootnoteNumber { number } = reference.reference_type {
250            if !defs.contains(&number) {
251                diagnostics.push(AnalysisDiagnostic {
252                    range: reference.range,
253                    severity: DiagnosticSeverity::Error,
254                    kind: DiagnosticKind::MissingFootnoteDefinition,
255                    message: format!(
256                        "Footnote [{number}] has no matching footnote definition in scope"
257                    ),
258                });
259            }
260        }
261    }
262}
263
264fn check_tables(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
265    visit_tables_in_session(&document.root, diagnostics);
266}
267
268fn visit_tables_in_session(session: &Session, diagnostics: &mut Vec<AnalysisDiagnostic>) {
269    for child in session.children.iter() {
270        visit_tables_in_content(child, diagnostics);
271    }
272}
273
274fn visit_tables_in_content(item: &ContentItem, diagnostics: &mut Vec<AnalysisDiagnostic>) {
275    match item {
276        ContentItem::Table(table) => check_table_columns(table, diagnostics),
277        ContentItem::Session(session) => visit_tables_in_session(session, diagnostics),
278        ContentItem::Definition(def) => {
279            for child in def.children.iter() {
280                visit_tables_in_content(child, diagnostics);
281            }
282        }
283        ContentItem::List(list) => {
284            for entry in &list.items {
285                if let ContentItem::ListItem(li) = entry {
286                    for child in li.children.iter() {
287                        visit_tables_in_content(child, diagnostics);
288                    }
289                }
290            }
291        }
292        ContentItem::Annotation(ann) => {
293            for child in ann.children.iter() {
294                visit_tables_in_content(child, diagnostics);
295            }
296        }
297        _ => {}
298    }
299}
300
301/// Check that all rows in a table have the same effective column count.
302///
303/// The effective width of a row accounts for both colspans of its own cells
304/// and rowspan carry-over from cells in prior rows that extend into it.
305/// Rows with different effective widths indicate a structural error (missing
306/// or extra cells).
307fn check_table_columns(table: &Table, diagnostics: &mut Vec<AnalysisDiagnostic>) {
308    let rows: Vec<_> = table.all_rows().collect();
309    if rows.len() < 2 {
310        return;
311    }
312
313    let widths = compute_row_widths(&rows);
314    let expected = widths[0];
315    for (i, &width) in widths.iter().enumerate().skip(1) {
316        if width != expected {
317            diagnostics.push(AnalysisDiagnostic {
318                range: rows[i].location.clone(),
319                severity: DiagnosticSeverity::Warning,
320                kind: DiagnosticKind::TableInconsistentColumns,
321                message: format!(
322                    "Row has {width} columns, expected {expected} (matching first row)"
323                ),
324            });
325        }
326    }
327}
328
329/// Simulate the virtual table grid to compute each row's effective width.
330///
331/// `carry[col]` tracks how many more rows (including the current one) a cell
332/// placed in a prior row still occupies column `col`. Own cells skip columns
333/// where `carry[col] > 0` (those are held by a cell from above via rowspan).
334fn compute_row_widths(rows: &[&TableRow]) -> Vec<usize> {
335    let mut carry: Vec<usize> = Vec::new();
336    let mut widths = Vec::with_capacity(rows.len());
337
338    for row in rows {
339        let mut col = 0;
340        for cell in &row.cells {
341            while col < carry.len() && carry[col] > 0 {
342                col += 1;
343            }
344            let end = col + cell.colspan;
345            if end > carry.len() {
346                carry.resize(end, 0);
347            }
348            for slot in carry.iter_mut().take(end).skip(col) {
349                *slot = cell.rowspan;
350            }
351            col = end;
352        }
353
354        let width = carry
355            .iter()
356            .rposition(|&r| r > 0)
357            .map(|i| i + 1)
358            .unwrap_or(0);
359        widths.push(width);
360
361        // Columns at or beyond `width` are guaranteed 0 (that's how width is
362        // defined), so limit the decrement to the active range and drop the
363        // trailing zeros to keep `carry` proportional to the live grid.
364        for c in carry.iter_mut().take(width) {
365            if *c > 0 {
366                *c -= 1;
367            }
368        }
369        carry.truncate(width);
370    }
371
372    widths
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use lex_core::lex::testing::lexplore::Lexplore;
379
380    fn footnote_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
381        analyze(doc)
382            .into_iter()
383            .filter(|d| d.kind == DiagnosticKind::MissingFootnoteDefinition)
384            .collect()
385    }
386
387    #[test]
388    fn detects_missing_footnote_definition() {
389        let doc = Lexplore::footnotes(1).parse().unwrap();
390        let diags = analyze(&doc);
391        assert_eq!(diags.len(), 1);
392        assert_eq!(diags[0].kind, DiagnosticKind::MissingFootnoteDefinition);
393    }
394
395    #[test]
396    fn ignores_valid_footnote_with_notes_annotation() {
397        // :: notes :: annotated list at the document root provides the definitions
398        let doc = Lexplore::footnotes(2).parse().unwrap();
399        assert!(footnote_diags(&doc).is_empty());
400    }
401
402    #[test]
403    fn ignores_valid_list_footnote_in_session() {
404        // :: notes :: inside a session
405        let doc = Lexplore::footnotes(3).parse().unwrap();
406        assert!(footnote_diags(&doc).is_empty());
407    }
408
409    #[test]
410    fn list_without_notes_annotation_is_not_footnotes() {
411        // A "Notes" session without :: notes :: does NOT define footnotes
412        let doc = Lexplore::footnotes(4).parse().unwrap();
413        assert_eq!(footnote_diags(&doc).len(), 1);
414    }
415
416    fn table_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
417        analyze(doc)
418            .into_iter()
419            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
420            .collect()
421    }
422
423    #[test]
424    fn detects_inconsistent_table_columns() {
425        // table-13: 3-col header, 2-col row, 3-col row — middle row is short.
426        let doc = Lexplore::table(13).parse().unwrap();
427        let diags = table_diags(&doc);
428        assert_eq!(diags.len(), 1);
429        assert!(diags[0].message.contains("2 columns"));
430        assert!(diags[0].message.contains("expected 3"));
431    }
432
433    #[test]
434    fn consistent_table_no_diagnostic() {
435        // table-01: minimal 2-column table, all rows consistent.
436        let doc = Lexplore::table(1).parse().unwrap();
437        assert!(table_diags(&doc).is_empty());
438    }
439
440    #[test]
441    fn table_with_rowspan_counts_carry_over() {
442        // table-17: rowspan via ^^ — effective widths remain consistent across rows.
443        let doc = Lexplore::table(17).parse().unwrap();
444        let diags = table_diags(&doc);
445        assert!(
446            diags.is_empty(),
447            "rowspan carry-over should not trigger inconsistent-columns, got: {diags:?}"
448        );
449    }
450
451    #[test]
452    fn table_with_colspan_and_rowspan_mixed() {
453        // table-18: combined >> colspan and ^^ rowspan; effective widths stay consistent.
454        let doc = Lexplore::table(18).parse().unwrap();
455        let diags = table_diags(&doc);
456        assert!(
457            diags.is_empty(),
458            "mixed colspan/rowspan should not trigger inconsistent-columns, got: {diags:?}"
459        );
460    }
461
462    #[test]
463    fn table_with_colspan_counts_effective_width() {
464        // table-04: colspan via >> contributes to effective width; all rows consistent.
465        let doc = Lexplore::table(4).parse().unwrap();
466        assert!(table_diags(&doc).is_empty());
467    }
468
469    #[test]
470    fn footnote_ref_in_table_cell_is_checked() {
471        // footnotes-09: table cell contains [1] but no footnote definition
472        // anywhere in scope — document, session, or table-local.
473        let doc = Lexplore::footnotes(9).parse().unwrap();
474        let diags = footnote_diags(&doc);
475        assert_eq!(diags.len(), 1);
476        assert!(diags[0].message.contains("[1]"));
477    }
478
479    #[test]
480    fn table_scoped_footnotes_resolve_cell_refs() {
481        // footnotes-11: cell refs [1] and [2] resolve to the table's own
482        // positional footnote list (no :: notes :: annotation needed).
483        let doc = Lexplore::footnotes(11).parse().unwrap();
484        let diags = footnote_diags(&doc);
485        assert!(
486            diags.is_empty(),
487            "table-scoped cell refs should resolve to table.footnotes, got: {diags:?}"
488        );
489    }
490
491    #[test]
492    fn table_scoped_footnotes_do_not_leak_out() {
493        // footnotes-12: a [1] ref in body text outside the table must NOT
494        // resolve to the table's own positional footnote list even when the
495        // numbers happen to match. The table's list is table-local.
496        let doc = Lexplore::footnotes(12).parse().unwrap();
497        let diags = footnote_diags(&doc);
498        assert_eq!(
499            diags.len(),
500            1,
501            "only the paragraph ref [1] should be unresolved, got: {diags:?}"
502        );
503        assert!(diags[0].message.contains("[1]"));
504    }
505}