Skip to main content

lex_analysis/
diagnostics.rs

1use crate::inline::extract_references;
2use crate::utils::for_each_text_content;
3use lex_core::lex::ast::{ContentItem, Document, Range, Session, Table, TableRow};
4use lex_core::lex::inlines::ReferenceType;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum DiagnosticKind {
8    MissingFootnoteDefinition,
9    UnusedFootnoteDefinition,
10    TableInconsistentColumns,
11}
12
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct AnalysisDiagnostic {
15    pub range: Range,
16    pub kind: DiagnosticKind,
17    pub message: String,
18}
19
20pub fn analyze(document: &Document) -> Vec<AnalysisDiagnostic> {
21    let mut diagnostics = Vec::new();
22    check_footnotes(document, &mut diagnostics);
23    check_tables(document, &mut diagnostics);
24    diagnostics
25}
26
27fn check_footnotes(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
28    // 1. Collect all numbered footnote references
29    let mut numbered_refs = Vec::new();
30    for_each_text_content(document, &mut |text| {
31        for reference in extract_references(text) {
32            if let ReferenceType::FootnoteNumber { number } = reference.reference_type {
33                numbered_refs.push((number, reference.range));
34            }
35        }
36    });
37
38    // 2. Collect footnote definitions from :: notes ::-annotated lists
39    let definitions_list = crate::utils::collect_footnote_definitions(document);
40    let mut numeric_definitions = std::collections::HashSet::new();
41    for (label, _) in &definitions_list {
42        if let Ok(number) = label.parse::<u32>() {
43            numeric_definitions.insert(number);
44        }
45    }
46
47    // 3. Check for missing definitions
48    for (number, range) in &numbered_refs {
49        if !numeric_definitions.contains(number) {
50            diagnostics.push(AnalysisDiagnostic {
51                range: range.clone(),
52                kind: DiagnosticKind::MissingFootnoteDefinition,
53                message: format!("Footnote [{number}] has no matching item in a :: notes :: list"),
54            });
55        }
56    }
57}
58
59fn check_tables(document: &Document, diagnostics: &mut Vec<AnalysisDiagnostic>) {
60    visit_tables_in_session(&document.root, diagnostics);
61}
62
63fn visit_tables_in_session(session: &Session, diagnostics: &mut Vec<AnalysisDiagnostic>) {
64    for child in session.children.iter() {
65        visit_tables_in_content(child, diagnostics);
66    }
67}
68
69fn visit_tables_in_content(item: &ContentItem, diagnostics: &mut Vec<AnalysisDiagnostic>) {
70    match item {
71        ContentItem::Table(table) => check_table_columns(table, diagnostics),
72        ContentItem::Session(session) => visit_tables_in_session(session, diagnostics),
73        ContentItem::Definition(def) => {
74            for child in def.children.iter() {
75                visit_tables_in_content(child, diagnostics);
76            }
77        }
78        ContentItem::List(list) => {
79            for entry in &list.items {
80                if let ContentItem::ListItem(li) = entry {
81                    for child in li.children.iter() {
82                        visit_tables_in_content(child, diagnostics);
83                    }
84                }
85            }
86        }
87        ContentItem::Annotation(ann) => {
88            for child in ann.children.iter() {
89                visit_tables_in_content(child, diagnostics);
90            }
91        }
92        _ => {}
93    }
94}
95
96/// Check that all rows in a table have the same effective column count.
97///
98/// The effective width of a row accounts for both colspans of its own cells
99/// and rowspan carry-over from cells in prior rows that extend into it.
100/// Rows with different effective widths indicate a structural error (missing
101/// or extra cells).
102fn check_table_columns(table: &Table, diagnostics: &mut Vec<AnalysisDiagnostic>) {
103    let rows: Vec<_> = table.all_rows().collect();
104    if rows.len() < 2 {
105        return;
106    }
107
108    let widths = compute_row_widths(&rows);
109    let expected = widths[0];
110    for (i, &width) in widths.iter().enumerate().skip(1) {
111        if width != expected {
112            diagnostics.push(AnalysisDiagnostic {
113                range: rows[i].location.clone(),
114                kind: DiagnosticKind::TableInconsistentColumns,
115                message: format!(
116                    "Row has {width} columns, expected {expected} (matching first row)"
117                ),
118            });
119        }
120    }
121}
122
123/// Simulate the virtual table grid to compute each row's effective width.
124///
125/// `carry[col]` tracks how many more rows (including the current one) a cell
126/// placed in a prior row still occupies column `col`. Own cells skip columns
127/// where `carry[col] > 0` (those are held by a cell from above via rowspan).
128fn compute_row_widths(rows: &[&TableRow]) -> Vec<usize> {
129    let mut carry: Vec<usize> = Vec::new();
130    let mut widths = Vec::with_capacity(rows.len());
131
132    for row in rows {
133        let mut col = 0;
134        for cell in &row.cells {
135            while col < carry.len() && carry[col] > 0 {
136                col += 1;
137            }
138            let end = col + cell.colspan;
139            if end > carry.len() {
140                carry.resize(end, 0);
141            }
142            for slot in carry.iter_mut().take(end).skip(col) {
143                *slot = cell.rowspan;
144            }
145            col = end;
146        }
147
148        let width = carry
149            .iter()
150            .rposition(|&r| r > 0)
151            .map(|i| i + 1)
152            .unwrap_or(0);
153        widths.push(width);
154
155        // Columns at or beyond `width` are guaranteed 0 (that's how width is
156        // defined), so limit the decrement to the active range and drop the
157        // trailing zeros to keep `carry` proportional to the live grid.
158        for c in carry.iter_mut().take(width) {
159            if *c > 0 {
160                *c -= 1;
161            }
162        }
163        carry.truncate(width);
164    }
165
166    widths
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172    use lex_core::lex::parsing;
173
174    fn parse(source: &str) -> Document {
175        parsing::parse_document(source).expect("parse failed")
176    }
177
178    #[test]
179    fn detects_missing_footnote_definition() {
180        let doc = parse("Text with [1] reference.");
181        let diags = analyze(&doc);
182        assert_eq!(diags.len(), 1);
183        assert_eq!(diags[0].kind, DiagnosticKind::MissingFootnoteDefinition);
184    }
185
186    #[test]
187    fn ignores_valid_footnote_with_notes_annotation() {
188        // :: notes :: annotated list provides the definitions
189        let doc = parse("Text [1].\n\n:: notes ::\n1. Note.\n2. Another.\n");
190        let diags = analyze(&doc);
191        let footnote_diags: Vec<_> = diags
192            .iter()
193            .filter(|d| d.kind == DiagnosticKind::MissingFootnoteDefinition)
194            .collect();
195        assert!(footnote_diags.is_empty());
196    }
197
198    #[test]
199    fn ignores_valid_list_footnote_in_session() {
200        // :: notes :: inside a session
201        let doc = parse("Text [1].\n\nNotes\n\n    :: notes ::\n\n    1. Note.\n    2. Another.\n");
202        let diags = analyze(&doc);
203        let footnote_diags: Vec<_> = diags
204            .iter()
205            .filter(|d| d.kind == DiagnosticKind::MissingFootnoteDefinition)
206            .collect();
207        assert!(footnote_diags.is_empty());
208    }
209
210    #[test]
211    fn list_without_notes_annotation_is_not_footnotes() {
212        // A "Notes" session without :: notes :: does NOT define footnotes
213        let doc = parse("Text [1].\n\nNotes\n\n    1. Note.\n    2. Another.\n");
214        let diags = analyze(&doc);
215        let footnote_diags: Vec<_> = diags
216            .iter()
217            .filter(|d| d.kind == DiagnosticKind::MissingFootnoteDefinition)
218            .collect();
219        assert_eq!(footnote_diags.len(), 1);
220    }
221
222    #[test]
223    fn detects_inconsistent_table_columns() {
224        let doc = parse("Data:\n    | A | B | C |\n    | 1 | 2 |\n:: table ::\n");
225        let diags = analyze(&doc);
226        let table_diags: Vec<_> = diags
227            .iter()
228            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
229            .collect();
230        assert_eq!(table_diags.len(), 1);
231        assert!(table_diags[0].message.contains("2 columns"));
232        assert!(table_diags[0].message.contains("expected 3"));
233    }
234
235    #[test]
236    fn consistent_table_no_diagnostic() {
237        let doc = parse("Data:\n    | A | B |\n    | 1 | 2 |\n:: table ::\n");
238        let diags = analyze(&doc);
239        let table_diags: Vec<_> = diags
240            .iter()
241            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
242            .collect();
243        assert!(table_diags.is_empty());
244    }
245
246    #[test]
247    fn table_with_rowspan_counts_carry_over() {
248        // Row 0: A | B | C           → 3 cells, widths all 1 → effective width 3
249        // Row 1: D | ^^ | E          → ^^ is absorbed into B (B gets rowspan=2),
250        //                              leaving row 1 with 2 cells [D, E]. But the
251        //                              column occupied by B's rowspan means row 1's
252        //                              effective width is still 3.
253        let doc = parse("Data:\n    | A | B  | C |\n    | D | ^^ | E |\n:: table ::\n");
254        let diags = analyze(&doc);
255        let table_diags: Vec<_> = diags
256            .iter()
257            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
258            .collect();
259        assert!(
260            table_diags.is_empty(),
261            "rowspan carry-over should not trigger inconsistent-columns, got: {table_diags:?}"
262        );
263    }
264
265    #[test]
266    fn table_with_colspan_and_rowspan_mixed() {
267        // Mirrors the "Conference Schedule" pattern from benchmark/080-gentle-introduction.lex:
268        //   | Time  | Room A          | Room B     |
269        //   | 9:00  | Opening Keynote | >>         |   (Opening Keynote colspan=2)
270        //   | 10:00 | Workshop        | Panel      |   (Workshop rowspan=2, via ^^ below)
271        //   | 11:00 | ^^              | Discussion |
272        let doc = parse(
273            "Data:\n    | Time  | Room A          | Room B     |\n    | 9:00  | Opening Keynote | >>         |\n    | 10:00 | Workshop        | Panel      |\n    | 11:00 | ^^              | Discussion |\n:: table ::\n",
274        );
275        let diags = analyze(&doc);
276        let table_diags: Vec<_> = diags
277            .iter()
278            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
279            .collect();
280        assert!(
281            table_diags.is_empty(),
282            "mixed colspan/rowspan should not trigger inconsistent-columns, got: {table_diags:?}"
283        );
284    }
285
286    #[test]
287    fn table_with_colspan_counts_effective_width() {
288        // Row 1: A + >> = 2 effective columns (colspan=2)
289        // Row 2: B + C = 2 columns
290        // After merge resolution: row 1 has 1 cell (colspan=2), row 2 has 2 cells (colspan=1 each)
291        // Effective widths: 2 and 2 — consistent
292        let doc = parse("Data:\n    | A  | >> |\n    | B  | C  |\n:: table ::\n");
293        let diags = analyze(&doc);
294        let table_diags: Vec<_> = diags
295            .iter()
296            .filter(|d| d.kind == DiagnosticKind::TableInconsistentColumns)
297            .collect();
298        assert!(table_diags.is_empty());
299    }
300
301    #[test]
302    fn footnote_ref_in_table_cell_is_checked() {
303        // Table cell contains [1] but no footnote definition exists
304        let doc = parse("Data:\n    | Item  | Note |\n    | Alpha | [1]  |\n:: table ::\n");
305        let diags = analyze(&doc);
306        let footnote_diags: Vec<_> = diags
307            .iter()
308            .filter(|d| d.kind == DiagnosticKind::MissingFootnoteDefinition)
309            .collect();
310        assert_eq!(footnote_diags.len(), 1);
311        assert!(footnote_diags[0].message.contains("[1]"));
312    }
313}