Skip to main content

aver/diagnostics/
classify.rs

1//! Classifiers and source-extraction helpers shared by factories.
2//!
3//! All functions are pure (no IO, no globals) and runtime-neutral.
4
5use super::model::{AnnotatedRegion, SourceLine};
6
7// ─── Source extraction ───────────────────────────────────────────────────────
8
9/// Extract source lines around `line` (1-based) with `context` lines of
10/// surrounding context on each side.
11pub(crate) fn extract_source_lines(source: &str, line: usize, context: usize) -> Vec<SourceLine> {
12    let lines: Vec<&str> = source.lines().collect();
13    let start = line.saturating_sub(context + 1);
14    let end = (line + context).min(lines.len());
15    (start..end)
16        .map(|i| SourceLine {
17            line_num: i + 1,
18            text: lines[i].to_string(),
19        })
20        .collect()
21}
22
23/// Extract source lines for an inclusive range [from..to] (1-based).
24pub(crate) fn extract_source_lines_range(source: &str, from: usize, to: usize) -> Vec<SourceLine> {
25    let lines: Vec<&str> = source.lines().collect();
26    let start = from.saturating_sub(1);
27    let end = to.min(lines.len());
28    (start..end)
29        .map(|i| SourceLine {
30            line_num: i + 1,
31            text: lines[i].to_string(),
32        })
33        .collect()
34}
35
36/// Extract the declared return type from a "declared return type is X" message.
37pub(crate) fn extract_return_type(msg: &str) -> &str {
38    msg.rsplit("declared return type is ").next().unwrap_or("?")
39}
40
41/// Estimate how many characters to underline starting at `col` (1-based,
42/// counted in Unicode scalar values).
43///
44/// Byte-indexing into `line` mis-measures multi-byte characters (em-dash,
45/// CJK, emoji). We iterate `chars()` so `col` lines up with what the
46/// parser emits (char-based) and what frontends render (1 glyph per
47/// scalar).
48pub(crate) fn estimate_span_len(line: &str, col: usize) -> usize {
49    let start = col.saturating_sub(1);
50    let len = line
51        .chars()
52        .skip(start)
53        .take_while(|c| !c.is_whitespace() && !matches!(c, '(' | ')' | '[' | ']' | ',' | ':'))
54        .count();
55    if len == 0 { 1 } else { len }
56}
57
58/// Fill small gaps (≤2 lines) between sorted regions with bridging source
59/// lines. Avoids a `...` separator for tiny jumps.
60pub(crate) fn fill_small_region_gaps(regions: &mut Vec<AnnotatedRegion>, source: &str) {
61    if regions.len() < 2 {
62        return;
63    }
64    let lines: Vec<&str> = source.lines().collect();
65    let mut i = 0;
66    while i + 1 < regions.len() {
67        let last_of_prev = regions[i]
68            .source_lines
69            .last()
70            .map(|sl| sl.line_num)
71            .unwrap_or(0);
72        let first_of_next = regions[i + 1]
73            .source_lines
74            .first()
75            .map(|sl| sl.line_num)
76            .unwrap_or(0);
77        if first_of_next > last_of_prev + 1 && first_of_next <= last_of_prev + 3 {
78            let bridge: Vec<SourceLine> = ((last_of_prev + 1)..first_of_next)
79                .filter_map(|ln| {
80                    lines.get(ln.saturating_sub(1)).map(|t| SourceLine {
81                        line_num: ln,
82                        text: t.to_string(),
83                    })
84                })
85                .collect();
86            if !bridge.is_empty() {
87                regions.insert(
88                    i + 1,
89                    AnnotatedRegion {
90                        source_lines: bridge,
91                        underline: None,
92                    },
93                );
94                i += 1;
95            }
96        }
97        i += 1;
98    }
99}
100
101/// Find the line number of the block header (`fn`, `verify`, `decision`) for
102/// `name`, searching forward from the start up to (but not including)
103/// `before_line`.
104pub(crate) fn find_block_header_line(
105    source: &str,
106    name: &str,
107    before_line: usize,
108) -> Option<usize> {
109    let needles = [
110        format!("fn {}", name),
111        format!("verify {}", name),
112        format!("decision {}", name),
113    ];
114    let mut best: Option<usize> = None;
115    for (i, line) in source.lines().enumerate() {
116        let line_num = i + 1;
117        if line_num >= before_line {
118            break;
119        }
120        let trimmed = line.trim_start();
121        for needle in &needles {
122            if trimmed.starts_with(needle.as_str()) {
123                best = Some(line_num);
124            }
125        }
126    }
127    best
128}
129
130/// Find where the block preamble ends. Returns the last preamble line
131/// number (1-based), capped before `before_line`.
132pub(crate) fn find_preamble_end(source: &str, header_line: usize, before_line: usize) -> usize {
133    let mut end = header_line;
134    for (i, line) in source.lines().enumerate() {
135        let line_num = i + 1;
136        if line_num <= header_line {
137            continue;
138        }
139        if line_num >= before_line {
140            break;
141        }
142        let trimmed = line.trim_start();
143        if trimmed.starts_with('?')
144            || trimmed.starts_with('!')
145            || trimmed.starts_with('"')
146            || trimmed.starts_with('[')
147            || trimmed.is_empty()
148            || (line.starts_with("    ") && trimmed.contains(" = "))
149        {
150            end = line_num;
151        } else {
152            break;
153        }
154    }
155    end
156}
157
158/// Try to find a precise (col, len) span by extracting the first quoted
159/// expression from `summary` and locating it in `source_line`.
160/// If `summary` mentions "right side" or `=>`, searches after `=>` in the line.
161/// Returns 1-based col.
162pub(crate) fn find_precise_span(source_line: &str, summary: &str) -> Option<(usize, usize)> {
163    let search_after_arrow = summary.contains("right side") || summary.contains("=>");
164    for quote in ['`', '\''] {
165        if let Some(start_offset) = summary.find(quote) {
166            let start = start_offset + 1;
167            if let Some(end_offset) = summary[start..].find(quote) {
168                let needle = &summary[start..start + end_offset];
169                if !needle.is_empty() {
170                    let search_region = if search_after_arrow {
171                        source_line
172                            .find("=>")
173                            .map(|arrow_pos| arrow_pos + 2)
174                            .unwrap_or(0)
175                    } else {
176                        0
177                    };
178                    if let Some(pos) = source_line[search_region..].find(needle) {
179                        return Some((search_region + pos + 1, needle.len()));
180                    }
181                }
182            }
183        }
184    }
185    None
186}
187
188// ─── Classification ──────────────────────────────────────────────────────────
189
190/// Classification of a type error message: (slug, conflict, fields, repair).
191pub(crate) type TypeErrorClassification = (
192    &'static str,
193    Option<String>,
194    Vec<(&'static str, String)>,
195    Option<String>,
196);
197
198pub(crate) fn classify_type_error(msg: &str) -> TypeErrorClassification {
199    if let Some(rest) = msg.strip_prefix("Type mismatch:") {
200        let rest = rest.trim();
201        let mut fields = Vec::new();
202        let mut expected = String::new();
203        let mut got = String::new();
204        if let Some((exp, g)) = rest.split_once(", got ") {
205            expected = exp
206                .strip_prefix("expected ")
207                .unwrap_or(exp)
208                .trim()
209                .to_string();
210            got = g.trim().to_string();
211            fields.push(("contract.expected", expected.clone()));
212            fields.push(("observed.actual", got.clone()));
213        }
214        let repair = if !expected.is_empty() && !got.is_empty() {
215            Some(format!("Change the expression to produce {}", expected))
216        } else {
217            None
218        };
219        return ("type-mismatch", Some(msg.to_string()), fields, repair);
220    }
221
222    if msg.starts_with("Unknown identifier") || msg.starts_with("Unknown function") {
223        return (
224            "unknown-ident",
225            None,
226            Vec::new(),
227            Some("Check the spelling or add the missing import".to_string()),
228        );
229    }
230
231    if msg.contains("expects") && msg.contains("argument") {
232        return (
233            "arity-mismatch",
234            Some(msg.to_string()),
235            Vec::new(),
236            Some("Adjust the number of arguments".to_string()),
237        );
238    }
239
240    if msg.contains("effect") && (msg.contains("not declared") || msg.contains("not allowed")) {
241        return (
242            "effect-violation",
243            Some(msg.to_string()),
244            Vec::new(),
245            Some("Add the missing effect to the function's ! [...] declaration".to_string()),
246        );
247    }
248
249    ("type-error", None, Vec::new(), None)
250}
251
252pub(crate) fn classify_finding(msg: &str) -> (&'static str, Option<String>) {
253    if msg.starts_with("File must declare `module") {
254        return (
255            "missing-module",
256            Some("Add `module <Name>` as the first top-level item".to_string()),
257        );
258    }
259    if msg.contains("has effects") && msg.contains("plain verify block") {
260        (
261            "verify-effectful",
262            Some(
263                "Use `verify <fn> trace` with `given` stubs, or test stateful flows via replay"
264                    .to_string(),
265            ),
266        )
267    } else if msg.contains("no verify block") {
268        (
269            "missing-verify",
270            Some("Add a verify block with representative test cases".to_string()),
271        )
272    } else if msg.contains("no description") {
273        (
274            "missing-description",
275            Some("Add a ? \"description\" line after the function signature".to_string()),
276        )
277    } else if msg.contains("non-tail recursion") {
278        (
279            "non-tail-recursion",
280            Some("Convert to accumulator style for tail-call optimization".to_string()),
281        )
282    } else if msg.contains("unused expose") || msg.contains("not used by") {
283        ("unused-expose", None)
284    } else if msg.contains("verify coverage") || msg.contains("verify case") {
285        ("verify-coverage", None)
286    } else if msg.contains("verify law") {
287        ("verify-law", None)
288    } else if msg.contains("List.len") && msg.contains("traverses the entire list") {
289        ("perf-list-len", split_repair(msg))
290    } else if msg.contains("string concatenation") && msg.contains("recursive call") {
291        ("perf-string-concat", split_repair(msg))
292    } else if msg.contains("nested `match") {
293        ("perf-nested-match", split_repair(msg))
294    } else if msg.contains("recomputed every recursive call") {
295        ("perf-loop-invariant", split_repair(msg))
296    } else if msg.contains("computed in both the match condition") {
297        ("cse-match", split_repair(msg))
298    } else if msg.contains("computed") && msg.contains("times in this function") {
299        ("cse-duplicate", split_repair(msg))
300    } else if msg.contains("Independent product branches")
301        && msg.contains("potentially conflicting effects")
302    {
303        ("independence-hazard", split_repair(msg))
304    } else if msg.contains("unused effect") {
305        (
306            "unused-effect",
307            Some("Remove unused effects from the ! [...] declaration".to_string()),
308        )
309    } else if msg.contains("unknown impact symbol") {
310        ("unknown-impact", split_repair(msg))
311    } else if msg.contains("must not call") && msg.contains("on the right side") {
312        ("verify-rhs", None)
313    } else if msg.contains("consider granular") {
314        ("effect-granularity", split_repair(msg))
315    } else if msg.starts_with("Function '") && msg.contains("should use camelCase") {
316        (
317            "bad-fn-name",
318            Some("Rename the function to camelCase; fix call sites manually".to_string()),
319        )
320    } else if msg.starts_with("Type '") && msg.contains("should use PascalCase") {
321        (
322            "bad-type-name",
323            Some("Rename the type to PascalCase; fix constructor references manually".to_string()),
324        )
325    } else if msg.starts_with("Module '") && msg.contains("should use PascalCase") {
326        (
327            "bad-module-name",
328            Some("Rename the module to PascalCase; update depends/file name to match".to_string()),
329        )
330    } else if msg.starts_with("Variant '") && msg.contains("PascalCase") {
331        (
332            "bad-variant-name",
333            Some("Rename the variant to PascalCase".to_string()),
334        )
335    } else if msg.starts_with("Record field '") && msg.contains("camelCase") {
336        (
337            "bad-field-name",
338            Some("Rename the field to camelCase".to_string()),
339        )
340    } else if msg.contains("verify examples") || msg.contains("verify case") {
341        ("verify-coverage", None)
342    } else {
343        ("check", None)
344    }
345}
346
347/// Split a message on ` — ` (em-dash) to extract the repair suggestion.
348pub(crate) fn split_repair(msg: &str) -> Option<String> {
349    msg.split_once(" — ")
350        .or_else(|| msg.split_once(" -- "))
351        .map(|(_, repair)| {
352            let mut r = repair.to_string();
353            if let Some(first) = r.get_mut(0..1) {
354                first.make_ascii_uppercase();
355            }
356            r
357        })
358}
359
360pub(crate) fn extract_fn_name_from_finding(msg: &str) -> Option<String> {
361    if let Some(start) = msg.find('\'')
362        && let Some(end) = msg[start + 1..].find('\'')
363    {
364        return Some(msg[start + 1..start + 1 + end].to_string());
365    }
366    None
367}
368
369#[cfg(test)]
370mod tests {
371    use super::classify_finding;
372
373    #[test]
374    fn classifies_independence_hazard_warning() {
375        let (slug, repair) = classify_finding(
376            "Independent product branches 1 and 2 use potentially conflicting effects [Console.print, Console.error] (shared terminal/output hazard) — independent products may reorder or overlap these effects; keep them sequential or suppress with [[check.suppress]] reason if this independence is intentional",
377        );
378        assert_eq!(slug, "independence-hazard");
379        assert!(repair.is_some());
380    }
381}