Skip to main content

shape_ast/parser/
resilient.rs

1//! Resilient parser for Shape language.
2//!
3//! `parse_program_resilient` always returns a partial program and a list of
4//! typed parse issues. This is intended for editor/LSP scenarios where partial
5//! ASTs are more useful than hard parse failure.
6
7use crate::ast::{Item, Program};
8use crate::parser::{Rule, ShapeParser, parse_item};
9use pest::Parser;
10use pest::error::InputLocation;
11
12/// A partially parsed program — always produced, never fails.
13#[derive(Debug, Clone)]
14pub struct PartialProgram {
15    /// Successfully parsed top-level items.
16    pub items: Vec<Item>,
17    /// Parse issues collected during resilient parsing.
18    pub errors: Vec<ParseError>,
19}
20
21impl PartialProgram {
22    /// Convert to a standard Program (dropping parse issue info).
23    pub fn into_program(self) -> Program {
24        Program { items: self.items }
25    }
26
27    /// Whether the parse was completely successful (no issues).
28    pub fn is_complete(&self) -> bool {
29        self.errors.is_empty()
30    }
31
32    /// True when every recorded issue is a grammar-level failure.
33    pub fn has_only_grammar_failures(&self) -> bool {
34        !self.errors.is_empty()
35            && self
36                .errors
37                .iter()
38                .all(|e| matches!(e.kind, ParseErrorKind::GrammarFailure))
39    }
40}
41
42/// Kind of resilient parse issue.
43#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
44pub enum ParseErrorKind {
45    RecoverySyntax,
46    ItemConversion,
47    GrammarFailure,
48    MalformedFromUse,
49    EmptyMatch,
50}
51
52/// A parse issue with span information.
53#[derive(Debug, Clone)]
54pub struct ParseError {
55    pub kind: ParseErrorKind,
56    pub message: String,
57    pub span: (usize, usize),
58}
59
60/// Parse a Shape program resiliently. Always succeeds.
61///
62/// - Uses the normal parser and collects `item_recovery` nodes as syntax issues.
63/// - Records AST conversion failures per item.
64/// - If grammar-level parsing fails, records a grammar failure issue.
65/// - Runs targeted source-level diagnostics (malformed `from ... use`, empty match).
66pub fn parse_program_resilient(source: &str) -> PartialProgram {
67    let mut items = Vec::new();
68    let mut errors = Vec::new();
69
70    match ShapeParser::parse(Rule::program, source) {
71        Ok(pairs) => collect_pairs(pairs, 0, &mut items, &mut errors),
72        Err(pest_err) => {
73            errors.push(parse_error_from_pest(&pest_err, source));
74            recover_items_before_grammar_failure(source, &pest_err, &mut items, &mut errors);
75        }
76    }
77
78    // Targeted parse diagnostics (single-source resilient pipeline).
79    errors.extend(detect_malformed_from_use(source));
80    errors.extend(detect_empty_match(source));
81
82    dedup_and_sort_errors(&mut errors);
83
84    PartialProgram { items, errors }
85}
86
87fn collect_pairs(
88    pairs: pest::iterators::Pairs<Rule>,
89    base_offset: usize,
90    items: &mut Vec<Item>,
91    errors: &mut Vec<ParseError>,
92) {
93    for pair in pairs {
94        if pair.as_rule() != Rule::program {
95            continue;
96        }
97
98        for inner in pair.into_inner() {
99            match inner.as_rule() {
100                Rule::item => match parse_item(inner.clone()) {
101                    Ok(item) => items.push(item),
102                    Err(e) => {
103                        let span = inner.as_span();
104                        errors.push(ParseError {
105                            kind: ParseErrorKind::ItemConversion,
106                            message: format!("Failed to parse item: {}", e),
107                            span: (base_offset + span.start(), base_offset + span.end()),
108                        });
109                    }
110                },
111                Rule::item_recovery => {
112                    let span = inner.as_span();
113                    let text = inner.as_str().trim();
114                    let preview = if text.len() > 40 {
115                        format!("{}...", &text[..40])
116                    } else {
117                        text.to_string()
118                    };
119                    errors.push(ParseError {
120                        kind: ParseErrorKind::RecoverySyntax,
121                        message: format!("Syntax error near: {}", preview),
122                        span: (base_offset + span.start(), base_offset + span.end()),
123                    });
124                }
125                Rule::EOI => {}
126                _ => {}
127            }
128        }
129    }
130}
131
132fn recover_items_before_grammar_failure(
133    source: &str,
134    err: &pest::error::Error<Rule>,
135    items: &mut Vec<Item>,
136    errors: &mut Vec<ParseError>,
137) {
138    let cutoff = match err.location {
139        InputLocation::Pos(pos) => pos.min(source.len()),
140        InputLocation::Span((start, _)) => start.min(source.len()),
141    };
142
143    if cutoff == 0 {
144        return;
145    }
146
147    for candidate in prefix_cutoffs(source, cutoff) {
148        if candidate == 0 {
149            continue;
150        }
151        let prefix = &source[..candidate];
152        if let Ok(pairs) = ShapeParser::parse(Rule::program, prefix) {
153            collect_pairs(pairs, 0, items, errors);
154            return;
155        }
156    }
157}
158
159fn prefix_cutoffs(source: &str, cutoff: usize) -> Vec<usize> {
160    let mut out = Vec::new();
161    let mut current = cutoff.min(source.len());
162    let mut attempts = 0usize;
163
164    while current > 0 && attempts < 64 {
165        out.push(current);
166        if let Some(prev_newline) = source[..current].rfind('\n') {
167            current = prev_newline;
168        } else {
169            break;
170        }
171        attempts += 1;
172    }
173
174    out
175}
176
177fn parse_error_from_pest(err: &pest::error::Error<Rule>, source: &str) -> ParseError {
178    let (start, end) = match err.location {
179        InputLocation::Pos(pos) => {
180            let s = pos.min(source.len());
181            (s, (s + 1).min(source.len()))
182        }
183        InputLocation::Span((start, end)) => {
184            let s = start.min(source.len());
185            let e = end.min(source.len());
186            if e > s {
187                (s, e)
188            } else {
189                (s, (s + 1).min(source.len()))
190            }
191        }
192    };
193
194    ParseError {
195        kind: ParseErrorKind::GrammarFailure,
196        message: format!("Parse error: {}", err),
197        span: (start, end),
198    }
199}
200
201fn dedup_and_sort_errors(errors: &mut Vec<ParseError>) {
202    errors.sort_by_key(|e| (e.span.0, e.span.1, e.kind));
203    errors.dedup_by(|a, b| a.kind == b.kind && a.span == b.span && a.message == b.message);
204}
205
206/// Best-effort targeted recovery for malformed `from <module> use { ... }` lines.
207///
208/// When `use` is misspelled (e.g. `duse`), grammar-level errors can point to
209/// the leading `from` token. This helper reports the actual offending token.
210fn detect_malformed_from_use(source: &str) -> Vec<ParseError> {
211    let mut out = Vec::new();
212    let mut line_base = 0usize;
213
214    for line in source.lines() {
215        let trimmed = line.trim_start();
216        let indent = line.len().saturating_sub(trimmed.len());
217
218        if !trimmed.starts_with("from ") {
219            line_base += line.len() + 1;
220            continue;
221        }
222
223        let mut parts = trimmed.split_whitespace();
224        let _from = parts.next();
225        let _path = parts.next();
226        let keyword = parts.next();
227
228        let Some(found) = keyword else {
229            line_base += line.len() + 1;
230            continue;
231        };
232
233        // `from ... in ...` is query syntax, not import syntax.
234        if found == "use" || found == "in" {
235            line_base += line.len() + 1;
236            continue;
237        }
238
239        if let Some(col) = trimmed.find(found) {
240            let start = line_base + indent + col;
241            let end = start + found.len();
242            out.push(ParseError {
243                kind: ParseErrorKind::MalformedFromUse,
244                message: format!(
245                    "expected keyword 'use' after module path, found '{}'",
246                    found
247                ),
248                span: (start, end),
249            });
250        }
251
252        line_base += line.len() + 1;
253    }
254
255    out
256}
257
258/// Detect empty match expressions:
259///
260/// ```text
261/// match value {
262/// }
263/// ```
264fn detect_empty_match(source: &str) -> Vec<ParseError> {
265    let mut out = Vec::new();
266    let mut search_from = 0usize;
267
268    while let Some(rel_match) = source[search_from..].find("match") {
269        let match_start = search_from + rel_match;
270
271        // Ensure token boundary for `match`.
272        let prev_ok = match_start == 0
273            || !source[..match_start]
274                .chars()
275                .next_back()
276                .is_some_and(|c| c.is_alphanumeric() || c == '_');
277        if !prev_ok {
278            search_from = match_start + "match".len();
279            continue;
280        }
281
282        let after_match = &source[match_start + "match".len()..];
283        let Some(open_rel) = after_match.find('{') else {
284            search_from = match_start + "match".len();
285            continue;
286        };
287        let open = match_start + "match".len() + open_rel;
288
289        let Some(close_rel) = source[open + 1..].find('}') else {
290            search_from = open + 1;
291            continue;
292        };
293        let close = open + 1 + close_rel;
294
295        let between = &source[open + 1..close];
296        let non_comment_content = between
297            .lines()
298            .map(|line| line.split_once("//").map(|(head, _)| head).unwrap_or(line))
299            .collect::<String>();
300
301        if non_comment_content.trim().is_empty() {
302            out.push(ParseError {
303                kind: ParseErrorKind::EmptyMatch,
304                message: "match expression requires at least one arm".to_string(),
305                span: (open, close + 1),
306            });
307        }
308
309        search_from = close + 1;
310    }
311
312    out
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318
319    #[test]
320    fn test_resilient_parse_valid_program() {
321        let source = r#"
322            let x = 10;
323            let y = 20;
324        "#;
325        let result = parse_program_resilient(source);
326        assert!(
327            result.errors.is_empty(),
328            "Expected no errors: {:?}",
329            result.errors
330        );
331        assert_eq!(result.items.len(), 2);
332        assert!(result.is_complete());
333    }
334
335    #[test]
336    fn test_resilient_parse_with_error_between_items() {
337        let source = r#"let x = 10;
338@@@ broken stuff here
339let y = 20;"#;
340        let result = parse_program_resilient(source);
341        assert!(!result.errors.is_empty(), "Expected some errors");
342        assert!(
343            !result.items.is_empty() || result.has_only_grammar_failures(),
344            "Expected partial items or explicit grammar failures, got: {:?}",
345            result.errors
346        );
347    }
348
349    #[test]
350    fn test_resilient_parse_recovers_after_bad_function() {
351        let source = r#"
352function good() {
353    return 1;
354}
355
356function bad( {
357    missing params
358}
359
360let x = 42;
361"#;
362        let result = parse_program_resilient(source);
363        assert!(!result.errors.is_empty(), "Expected parse issues");
364        assert!(
365            result.items.len() >= 1 || result.has_only_grammar_failures(),
366            "Expected partial items or grammar-failure issues, got {} items and errors: {:?}",
367            result.items.len(),
368            result.errors
369        );
370    }
371
372    #[test]
373    fn test_resilient_parse_empty_source() {
374        let result = parse_program_resilient("");
375        assert!(result.items.is_empty());
376        assert!(result.errors.is_empty());
377    }
378
379    #[test]
380    fn test_resilient_parse_only_errors() {
381        let source = "@@@ !!! ??? garbage";
382        let result = parse_program_resilient(source);
383        assert!(
384            !result.errors.is_empty(),
385            "Expected errors for garbage input"
386        );
387    }
388
389    #[test]
390    fn test_partial_program_into_program() {
391        let source = "let x = 10;";
392        let result = parse_program_resilient(source);
393        let program = result.into_program();
394        assert_eq!(program.items.len(), 1);
395    }
396
397    #[test]
398    fn test_reports_misspelled_from_use_keyword_with_token_span() {
399        let source = "from std::core::snapshot duse { Snapshot }\nlet x = 1;\n";
400        let result = parse_program_resilient(source);
401
402        let specific = result
403            .errors
404            .iter()
405            .find(|e| e.kind == ParseErrorKind::MalformedFromUse)
406            .expect("expected targeted malformed import diagnostic");
407
408        let bad = &source[specific.span.0..specific.span.1];
409        assert_eq!(bad, "duse");
410    }
411
412    #[test]
413    fn test_empty_match_does_not_emit_misleading_from_identifier_error() {
414        let source = r#"
415from std::core::snapshot use { Snapshot }
416
417let x = {x: 1}
418let y = | x | 10*(x.x*2)
419print(f"this is {y(x)}")
420
421x.y = 1
422let i = 10D
423
424let c = "d"
425
426fn afunc(c) {
427  print("func called with " + c)
428  match c {
429
430  }
431  return c
432}
433
434print(afunc(x))
435"#;
436
437        let result = parse_program_resilient(source);
438        assert!(
439            !result
440                .errors
441                .iter()
442                .any(|e| e.message.contains("found identifier `from`")),
443            "resilient parser produced misleading import-token error: {:?}",
444            result.errors
445        );
446    }
447
448    #[test]
449    fn test_resilient_parse_keeps_typed_match_after_commented_line() {
450        let source = r#"
451from std::core::snapshot use { Snapshot }
452
453fn afunc(c) {
454  //print("func called with " + c)
455  let result = match c {
456    c: int => c + 1
457    _ => 1
458  }
459  return c
460  return "hi"
461}
462"#;
463
464        let result = parse_program_resilient(source);
465        assert!(
466            result
467                .items
468                .iter()
469                .any(|item| matches!(item, crate::ast::Item::Function(_, _))),
470            "expected function item to parse, got: {:?}",
471            result.items
472        );
473    }
474
475    #[test]
476    fn test_detect_empty_match_reports_precise_span() {
477        let source = "fn f(x) {\n  match x {\n\n  }\n}\n";
478        let errors = detect_empty_match(source);
479        assert!(
480            errors.iter().any(|e| e.kind == ParseErrorKind::EmptyMatch),
481            "expected empty match issue, got: {:?}",
482            errors
483        );
484    }
485}