Skip to main content

lex_analysis/
diagnostics.rs

1use crate::inline::extract_references;
2use lex_core::lex::ast::{
3    Annotation, ContentItem, Document, Range, Session, Table, TableRow, TextContent,
4};
5use lex_core::lex::inlines::ReferenceType;
6use std::collections::HashSet;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum DiagnosticKind {
10    MissingFootnoteDefinition,
11    UnusedFootnoteDefinition,
12    TableInconsistentColumns,
13}
14
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct AnalysisDiagnostic {
17    pub range: Range,
18    pub kind: DiagnosticKind,
19    pub message: String,
20}
21
22pub fn analyze(document: &Document) -> Vec<AnalysisDiagnostic> {
23    let mut diagnostics = Vec::new();
24    check_footnotes(document, &mut diagnostics);
25    check_tables(document, &mut diagnostics);
26    diagnostics
27}
28
29fn check_footnotes(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
30    // Numbered definitions reachable from outside any table: :: notes ::
31    // annotated lists at document or session scope.
32    let outer_defs: HashSet<u32> = crate::utils::collect_footnote_definitions(document)
33        .into_iter()
34        .filter_map(|(label, _)| label.parse::<u32>().ok())
35        .collect();
36
37    // References outside tables resolve to `outer_defs`; references inside a
38    // table resolve first to that table's own positional footnote list
39    // (`table.footnotes`) and then fall back to `outer_defs`.
40    if let Some(title) = &document.title {
41        check_text(&title.content, &outer_defs, diagnostics);
42    }
43    for annotation in document.annotations() {
44        check_annotation(annotation, &outer_defs, diagnostics);
45    }
46    check_session(&document.root, &outer_defs, diagnostics);
47}
48
49fn check_session(
50    session: &Session,
51    defs: &HashSet<u32>,
52    diagnostics: &mut Vec<AnalysisDiagnostic>,
53) {
54    check_text(&session.title, defs, diagnostics);
55    for annotation in session.annotations() {
56        check_annotation(annotation, defs, diagnostics);
57    }
58    for child in session.children.iter() {
59        check_content(child, defs, diagnostics);
60    }
61}
62
63fn check_content(
64    item: &ContentItem,
65    defs: &HashSet<u32>,
66    diagnostics: &mut Vec<AnalysisDiagnostic>,
67) {
68    match item {
69        ContentItem::Paragraph(p) => {
70            for line in &p.lines {
71                if let ContentItem::TextLine(tl) = line {
72                    check_text(&tl.content, defs, diagnostics);
73                }
74            }
75            for annotation in p.annotations() {
76                check_annotation(annotation, defs, diagnostics);
77            }
78        }
79        ContentItem::Session(s) => check_session(s, defs, diagnostics),
80        ContentItem::List(list) => {
81            for annotation in list.annotations() {
82                check_annotation(annotation, defs, diagnostics);
83            }
84            for entry in &list.items {
85                if let ContentItem::ListItem(li) = entry {
86                    for text in &li.text {
87                        check_text(text, defs, diagnostics);
88                    }
89                    for annotation in li.annotations() {
90                        check_annotation(annotation, defs, diagnostics);
91                    }
92                    for child in li.children.iter() {
93                        check_content(child, defs, diagnostics);
94                    }
95                }
96            }
97        }
98        ContentItem::Definition(def) => {
99            check_text(&def.subject, defs, diagnostics);
100            for annotation in def.annotations() {
101                check_annotation(annotation, defs, diagnostics);
102            }
103            for child in def.children.iter() {
104                check_content(child, defs, diagnostics);
105            }
106        }
107        ContentItem::Annotation(a) => check_annotation(a, defs, diagnostics),
108        ContentItem::VerbatimBlock(v) => {
109            check_text(&v.subject, defs, diagnostics);
110            for annotation in v.annotations() {
111                check_annotation(annotation, defs, diagnostics);
112            }
113        }
114        ContentItem::Table(table) => check_table(table, defs, diagnostics),
115        _ => {}
116    }
117}
118
119fn check_annotation(
120    annotation: &Annotation,
121    defs: &HashSet<u32>,
122    diagnostics: &mut Vec<AnalysisDiagnostic>,
123) {
124    for child in annotation.children.iter() {
125        check_content(child, defs, diagnostics);
126    }
127}
128
129fn check_table(
130    table: &Table,
131    outer_defs: &HashSet<u32>,
132    diagnostics: &mut Vec<AnalysisDiagnostic>,
133) {
134    // Extend the in-scope definitions with the table's positional footnote
135    // list. The table's own numbered items shadow nothing — they just add
136    // table-local numbers that references inside this table may resolve to.
137    // Fast path: most tables have no footnotes, so reuse `outer_defs` rather
138    // than cloning it into a new `HashSet` for every such table.
139    let table_defs = table_footnote_numbers(table);
140    if table_defs.is_empty() {
141        check_table_text(table, outer_defs, diagnostics);
142        return;
143    }
144    let mut scope = outer_defs.clone();
145    scope.extend(table_defs);
146    check_table_text(table, &scope, diagnostics);
147}
148
149fn check_table_text(table: &Table, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
150    check_text(&table.subject, defs, diagnostics);
151    for row in table.all_rows() {
152        for cell in &row.cells {
153            check_text(&cell.content, defs, diagnostics);
154        }
155    }
156    for annotation in table.annotations() {
157        check_annotation(annotation, defs, diagnostics);
158    }
159}
160
161fn table_footnote_numbers(table: &Table) -> HashSet<u32> {
162    let Some(list) = &table.footnotes else {
163        return HashSet::new();
164    };
165    let mut numbers = HashSet::new();
166    for entry in &list.items {
167        if let ContentItem::ListItem(li) = entry {
168            let label = li
169                .marker()
170                .trim()
171                .trim_end_matches(['.', ')', ':'].as_ref())
172                .trim();
173            if let Ok(n) = label.parse::<u32>() {
174                numbers.insert(n);
175            }
176        }
177    }
178    numbers
179}
180
181fn check_text(text: &TextContent, defs: &HashSet<u32>, diagnostics: &mut Vec<AnalysisDiagnostic>) {
182    for reference in extract_references(text) {
183        if let ReferenceType::FootnoteNumber { number } = reference.reference_type {
184            if !defs.contains(&number) {
185                diagnostics.push(AnalysisDiagnostic {
186                    range: reference.range,
187                    kind: DiagnosticKind::MissingFootnoteDefinition,
188                    message: format!(
189                        "Footnote [{number}] has no matching footnote definition in scope"
190                    ),
191                });
192            }
193        }
194    }
195}
196
197fn check_tables(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
198    visit_tables_in_session(&document.root, diagnostics);
199}
200
201fn visit_tables_in_session(session: &Session, diagnostics: &mut Vec<AnalysisDiagnostic>) {
202    for child in session.children.iter() {
203        visit_tables_in_content(child, diagnostics);
204    }
205}
206
207fn visit_tables_in_content(item: &ContentItem, diagnostics: &mut Vec<AnalysisDiagnostic>) {
208    match item {
209        ContentItem::Table(table) => check_table_columns(table, diagnostics),
210        ContentItem::Session(session) => visit_tables_in_session(session, diagnostics),
211        ContentItem::Definition(def) => {
212            for child in def.children.iter() {
213                visit_tables_in_content(child, diagnostics);
214            }
215        }
216        ContentItem::List(list) => {
217            for entry in &list.items {
218                if let ContentItem::ListItem(li) = entry {
219                    for child in li.children.iter() {
220                        visit_tables_in_content(child, diagnostics);
221                    }
222                }
223            }
224        }
225        ContentItem::Annotation(ann) => {
226            for child in ann.children.iter() {
227                visit_tables_in_content(child, diagnostics);
228            }
229        }
230        _ => {}
231    }
232}
233
234/// Check that all rows in a table have the same effective column count.
235///
236/// The effective width of a row accounts for both colspans of its own cells
237/// and rowspan carry-over from cells in prior rows that extend into it.
238/// Rows with different effective widths indicate a structural error (missing
239/// or extra cells).
240fn check_table_columns(table: &Table, diagnostics: &mut Vec<AnalysisDiagnostic>) {
241    let rows: Vec<_> = table.all_rows().collect();
242    if rows.len() < 2 {
243        return;
244    }
245
246    let widths = compute_row_widths(&rows);
247    let expected = widths[0];
248    for (i, &width) in widths.iter().enumerate().skip(1) {
249        if width != expected {
250            diagnostics.push(AnalysisDiagnostic {
251                range: rows[i].location.clone(),
252                kind: DiagnosticKind::TableInconsistentColumns,
253                message: format!(
254                    "Row has {width} columns, expected {expected} (matching first row)"
255                ),
256            });
257        }
258    }
259}
260
261/// Simulate the virtual table grid to compute each row's effective width.
262///
263/// `carry[col]` tracks how many more rows (including the current one) a cell
264/// placed in a prior row still occupies column `col`. Own cells skip columns
265/// where `carry[col] > 0` (those are held by a cell from above via rowspan).
266fn compute_row_widths(rows: &[&TableRow]) -> Vec<usize> {
267    let mut carry: Vec<usize> = Vec::new();
268    let mut widths = Vec::with_capacity(rows.len());
269
270    for row in rows {
271        let mut col = 0;
272        for cell in &row.cells {
273            while col < carry.len() && carry[col] > 0 {
274                col += 1;
275            }
276            let end = col + cell.colspan;
277            if end > carry.len() {
278                carry.resize(end, 0);
279            }
280            for slot in carry.iter_mut().take(end).skip(col) {
281                *slot = cell.rowspan;
282            }
283            col = end;
284        }
285
286        let width = carry
287            .iter()
288            .rposition(|&r| r > 0)
289            .map(|i| i + 1)
290            .unwrap_or(0);
291        widths.push(width);
292
293        // Columns at or beyond `width` are guaranteed 0 (that's how width is
294        // defined), so limit the decrement to the active range and drop the
295        // trailing zeros to keep `carry` proportional to the live grid.
296        for c in carry.iter_mut().take(width) {
297            if *c > 0 {
298                *c -= 1;
299            }
300        }
301        carry.truncate(width);
302    }
303
304    widths
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310    use lex_core::lex::testing::lexplore::Lexplore;
311
312    fn footnote_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
313        analyze(doc)
314            .into_iter()
315            .filter(|d| d.kind == DiagnosticKind::MissingFootnoteDefinition)
316            .collect()
317    }
318
319    #[test]
320    fn detects_missing_footnote_definition() {
321        let doc = Lexplore::footnotes(1).parse().unwrap();
322        let diags = analyze(&doc);
323        assert_eq!(diags.len(), 1);
324        assert_eq!(diags[0].kind, DiagnosticKind::MissingFootnoteDefinition);
325    }
326
327    #[test]
328    fn ignores_valid_footnote_with_notes_annotation() {
329        // :: notes :: annotated list at the document root provides the definitions
330        let doc = Lexplore::footnotes(2).parse().unwrap();
331        assert!(footnote_diags(&doc).is_empty());
332    }
333
334    #[test]
335    fn ignores_valid_list_footnote_in_session() {
336        // :: notes :: inside a session
337        let doc = Lexplore::footnotes(3).parse().unwrap();
338        assert!(footnote_diags(&doc).is_empty());
339    }
340
341    #[test]
342    fn list_without_notes_annotation_is_not_footnotes() {
343        // A "Notes" session without :: notes :: does NOT define footnotes
344        let doc = Lexplore::footnotes(4).parse().unwrap();
345        assert_eq!(footnote_diags(&doc).len(), 1);
346    }
347
348    fn table_diags(doc: &Document) -> Vec<AnalysisDiagnostic> {
349        analyze(doc)
350            .into_iter()
351            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
352            .collect()
353    }
354
355    #[test]
356    fn detects_inconsistent_table_columns() {
357        // table-13: 3-col header, 2-col row, 3-col row — middle row is short.
358        let doc = Lexplore::table(13).parse().unwrap();
359        let diags = table_diags(&doc);
360        assert_eq!(diags.len(), 1);
361        assert!(diags[0].message.contains("2 columns"));
362        assert!(diags[0].message.contains("expected 3"));
363    }
364
365    #[test]
366    fn consistent_table_no_diagnostic() {
367        // table-01: minimal 2-column table, all rows consistent.
368        let doc = Lexplore::table(1).parse().unwrap();
369        assert!(table_diags(&doc).is_empty());
370    }
371
372    #[test]
373    fn table_with_rowspan_counts_carry_over() {
374        // table-17: rowspan via ^^ — effective widths remain consistent across rows.
375        let doc = Lexplore::table(17).parse().unwrap();
376        let diags = table_diags(&doc);
377        assert!(
378            diags.is_empty(),
379            "rowspan carry-over should not trigger inconsistent-columns, got: {diags:?}"
380        );
381    }
382
383    #[test]
384    fn table_with_colspan_and_rowspan_mixed() {
385        // table-18: combined >> colspan and ^^ rowspan; effective widths stay consistent.
386        let doc = Lexplore::table(18).parse().unwrap();
387        let diags = table_diags(&doc);
388        assert!(
389            diags.is_empty(),
390            "mixed colspan/rowspan should not trigger inconsistent-columns, got: {diags:?}"
391        );
392    }
393
394    #[test]
395    fn table_with_colspan_counts_effective_width() {
396        // table-04: colspan via >> contributes to effective width; all rows consistent.
397        let doc = Lexplore::table(4).parse().unwrap();
398        assert!(table_diags(&doc).is_empty());
399    }
400
401    #[test]
402    fn footnote_ref_in_table_cell_is_checked() {
403        // footnotes-09: table cell contains [1] but no footnote definition
404        // anywhere in scope — document, session, or table-local.
405        let doc = Lexplore::footnotes(9).parse().unwrap();
406        let diags = footnote_diags(&doc);
407        assert_eq!(diags.len(), 1);
408        assert!(diags[0].message.contains("[1]"));
409    }
410
411    #[test]
412    fn table_scoped_footnotes_resolve_cell_refs() {
413        // footnotes-11: cell refs [1] and [2] resolve to the table's own
414        // positional footnote list (no :: notes :: annotation needed).
415        let doc = Lexplore::footnotes(11).parse().unwrap();
416        let diags = footnote_diags(&doc);
417        assert!(
418            diags.is_empty(),
419            "table-scoped cell refs should resolve to table.footnotes, got: {diags:?}"
420        );
421    }
422
423    #[test]
424    fn table_scoped_footnotes_do_not_leak_out() {
425        // footnotes-12: a [1] ref in body text outside the table must NOT
426        // resolve to the table's own positional footnote list even when the
427        // numbers happen to match. The table's list is table-local.
428        let doc = Lexplore::footnotes(12).parse().unwrap();
429        let diags = footnote_diags(&doc);
430        assert_eq!(
431            diags.len(),
432            1,
433            "only the paragraph ref [1] should be unresolved, got: {diags:?}"
434        );
435        assert!(diags[0].message.contains("[1]"));
436    }
437}