Skip to main content

shape_ast/parser/
resilient.rs

1//! Resilient parser for Shape language.
2//!
3//! `parse_program_resilient` always returns a partial program and a list of
4//! typed parse issues. This is intended for editor/LSP scenarios where partial
5//! ASTs are more useful than hard parse failure.
6
7use crate::ast::{Item, Program};
8use crate::parser::{Rule, ShapeParser, parse_item};
9use pest::Parser;
10use pest::error::InputLocation;
11
12/// A partially parsed program — always produced, never fails.
13#[derive(Debug, Clone)]
14pub struct PartialProgram {
15    /// Successfully parsed top-level items.
16    pub items: Vec<Item>,
17    /// Module-level doc comment declared at the start of the file.
18    pub doc_comment: Option<crate::ast::DocComment>,
19    /// Parse issues collected during resilient parsing.
20    pub errors: Vec<ParseError>,
21}
22
23impl PartialProgram {
24    /// Convert to a standard Program (dropping parse issue info).
25    pub fn into_program(self) -> Program {
26        let mut program = Program {
27            items: self.items,
28            docs: crate::ast::ProgramDocs::default(),
29        };
30        program.docs =
31            crate::parser::docs::build_program_docs(&program, self.doc_comment.as_ref());
32        program
33    }
34
35    /// Whether the parse was completely successful (no issues).
36    pub fn is_complete(&self) -> bool {
37        self.errors.is_empty()
38    }
39
40    /// True when every recorded issue is a grammar-level failure.
41    pub fn has_only_grammar_failures(&self) -> bool {
42        !self.errors.is_empty()
43            && self
44                .errors
45                .iter()
46                .all(|e| matches!(e.kind, ParseErrorKind::GrammarFailure))
47    }
48}
49
50/// Kind of resilient parse issue.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
52pub enum ParseErrorKind {
53    RecoverySyntax,
54    ItemConversion,
55    GrammarFailure,
56    MalformedFromUse,
57    EmptyMatch,
58}
59
60/// A parse issue with span information.
61#[derive(Debug, Clone)]
62pub struct ParseError {
63    pub kind: ParseErrorKind,
64    pub message: String,
65    pub span: (usize, usize),
66}
67
68/// Parse a Shape program resiliently. Always succeeds.
69///
70/// - Uses the normal parser and collects `item_recovery` nodes as syntax issues.
71/// - Records AST conversion failures per item.
72/// - If grammar-level parsing fails, records a grammar failure issue.
73/// - Runs targeted source-level diagnostics (malformed `from ... use`, empty match).
74pub fn parse_program_resilient(source: &str) -> PartialProgram {
75    let mut items = Vec::new();
76    let mut doc_comment = None;
77    let mut errors = Vec::new();
78
79    match ShapeParser::parse(Rule::program, source) {
80        Ok(pairs) => collect_pairs(pairs, 0, &mut items, &mut doc_comment, &mut errors),
81        Err(pest_err) => {
82            errors.push(parse_error_from_pest(&pest_err, source));
83            recover_items_before_grammar_failure(source, &pest_err, &mut items, &mut errors);
84        }
85    }
86
87    // Targeted parse diagnostics (single-source resilient pipeline).
88    errors.extend(detect_malformed_from_use(source));
89    errors.extend(detect_empty_match(source));
90
91    dedup_and_sort_errors(&mut errors);
92
93    PartialProgram {
94        items,
95        doc_comment,
96        errors,
97    }
98}
99
100fn collect_pairs(
101    pairs: pest::iterators::Pairs<Rule>,
102    base_offset: usize,
103    items: &mut Vec<Item>,
104    doc_comment: &mut Option<crate::ast::DocComment>,
105    errors: &mut Vec<ParseError>,
106) {
107    for pair in pairs {
108        if pair.as_rule() != Rule::program {
109            continue;
110        }
111
112        for inner in pair.into_inner() {
113            match inner.as_rule() {
114                Rule::program_doc_comment => {
115                    *doc_comment = Some(crate::parser::docs::parse_doc_comment(inner));
116                }
117                Rule::item => match parse_item(inner.clone()) {
118                    Ok(item) => items.push(item),
119                    Err(e) => {
120                        let span = inner.as_span();
121                        errors.push(ParseError {
122                            kind: ParseErrorKind::ItemConversion,
123                            message: format!("Failed to parse item: {}", e),
124                            span: (base_offset + span.start(), base_offset + span.end()),
125                        });
126                    }
127                },
128                Rule::item_recovery => {
129                    let span = inner.as_span();
130                    let text = inner.as_str().trim();
131                    let preview = if text.len() > 40 {
132                        format!("{}...", &text[..40])
133                    } else {
134                        text.to_string()
135                    };
136                    errors.push(ParseError {
137                        kind: ParseErrorKind::RecoverySyntax,
138                        message: format!("Syntax error near: {}", preview),
139                        span: (base_offset + span.start(), base_offset + span.end()),
140                    });
141                }
142                Rule::EOI => {}
143                _ => {}
144            }
145        }
146    }
147}
148
149fn recover_items_before_grammar_failure(
150    source: &str,
151    err: &pest::error::Error<Rule>,
152    items: &mut Vec<Item>,
153    errors: &mut Vec<ParseError>,
154) {
155    let cutoff = match err.location {
156        InputLocation::Pos(pos) => pos.min(source.len()),
157        InputLocation::Span((start, _)) => start.min(source.len()),
158    };
159
160    if cutoff == 0 {
161        return;
162    }
163
164    for candidate in prefix_cutoffs(source, cutoff) {
165        if candidate == 0 {
166            continue;
167        }
168        let prefix = &source[..candidate];
169        if let Ok(pairs) = ShapeParser::parse(Rule::program, prefix) {
170            let mut doc_comment = None;
171            collect_pairs(pairs, 0, items, &mut doc_comment, errors);
172            return;
173        }
174    }
175}
176
177fn prefix_cutoffs(source: &str, cutoff: usize) -> Vec<usize> {
178    let mut out = Vec::new();
179    let mut current = cutoff.min(source.len());
180    let mut attempts = 0usize;
181
182    while current > 0 && attempts < 64 {
183        out.push(current);
184        if let Some(prev_newline) = source[..current].rfind('\n') {
185            current = prev_newline;
186        } else {
187            break;
188        }
189        attempts += 1;
190    }
191
192    out
193}
194
195fn parse_error_from_pest(err: &pest::error::Error<Rule>, source: &str) -> ParseError {
196    let (start, end) = match err.location {
197        InputLocation::Pos(pos) => {
198            let s = pos.min(source.len());
199            (s, (s + 1).min(source.len()))
200        }
201        InputLocation::Span((start, end)) => {
202            let s = start.min(source.len());
203            let e = end.min(source.len());
204            if e > s {
205                (s, e)
206            } else {
207                (s, (s + 1).min(source.len()))
208            }
209        }
210    };
211
212    ParseError {
213        kind: ParseErrorKind::GrammarFailure,
214        message: format!("Parse error: {}", err),
215        span: (start, end),
216    }
217}
218
219fn dedup_and_sort_errors(errors: &mut Vec<ParseError>) {
220    errors.sort_by_key(|e| (e.span.0, e.span.1, e.kind));
221    errors.dedup_by(|a, b| a.kind == b.kind && a.span == b.span && a.message == b.message);
222}
223
224/// Best-effort targeted recovery for malformed `from <module> use { ... }` lines.
225///
226/// When `use` is misspelled (e.g. `duse`), grammar-level errors can point to
227/// the leading `from` token. This helper reports the actual offending token.
228fn detect_malformed_from_use(source: &str) -> Vec<ParseError> {
229    let mut out = Vec::new();
230    let mut line_base = 0usize;
231
232    for line in source.lines() {
233        let trimmed = line.trim_start();
234        let indent = line.len().saturating_sub(trimmed.len());
235
236        if !trimmed.starts_with("from ") {
237            line_base += line.len() + 1;
238            continue;
239        }
240
241        let mut parts = trimmed.split_whitespace();
242        let _from = parts.next();
243        let _path = parts.next();
244        let keyword = parts.next();
245
246        let Some(found) = keyword else {
247            line_base += line.len() + 1;
248            continue;
249        };
250
251        // `from ... in ...` is query syntax, not import syntax.
252        if found == "use" || found == "in" {
253            line_base += line.len() + 1;
254            continue;
255        }
256
257        if let Some(col) = trimmed.find(found) {
258            let start = line_base + indent + col;
259            let end = start + found.len();
260            out.push(ParseError {
261                kind: ParseErrorKind::MalformedFromUse,
262                message: format!(
263                    "expected keyword 'use' after module path, found '{}'",
264                    found
265                ),
266                span: (start, end),
267            });
268        }
269
270        line_base += line.len() + 1;
271    }
272
273    out
274}
275
276/// Detect empty match expressions:
277///
278/// ```text
279/// match value {
280/// }
281/// ```
282fn detect_empty_match(source: &str) -> Vec<ParseError> {
283    let mut out = Vec::new();
284    let mut search_from = 0usize;
285
286    while let Some(rel_match) = source[search_from..].find("match") {
287        let match_start = search_from + rel_match;
288
289        // Ensure token boundary for `match`.
290        let prev_ok = match_start == 0
291            || !source[..match_start]
292                .chars()
293                .next_back()
294                .is_some_and(|c| c.is_alphanumeric() || c == '_');
295        if !prev_ok {
296            search_from = match_start + "match".len();
297            continue;
298        }
299
300        let after_match = &source[match_start + "match".len()..];
301        let Some(open_rel) = after_match.find('{') else {
302            search_from = match_start + "match".len();
303            continue;
304        };
305        let open = match_start + "match".len() + open_rel;
306
307        let Some(close_rel) = source[open + 1..].find('}') else {
308            search_from = open + 1;
309            continue;
310        };
311        let close = open + 1 + close_rel;
312
313        let between = &source[open + 1..close];
314        let non_comment_content = between
315            .lines()
316            .map(|line| line.split_once("//").map(|(head, _)| head).unwrap_or(line))
317            .collect::<String>();
318
319        if non_comment_content.trim().is_empty() {
320            out.push(ParseError {
321                kind: ParseErrorKind::EmptyMatch,
322                message: "match expression requires at least one arm".to_string(),
323                span: (open, close + 1),
324            });
325        }
326
327        search_from = close + 1;
328    }
329
330    out
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn test_resilient_parse_valid_program() {
339        let source = r#"
340            let x = 10;
341            let y = 20;
342        "#;
343        let result = parse_program_resilient(source);
344        assert!(
345            result.errors.is_empty(),
346            "Expected no errors: {:?}",
347            result.errors
348        );
349        assert_eq!(result.items.len(), 2);
350        assert!(result.is_complete());
351    }
352
353    #[test]
354    fn test_resilient_parse_with_error_between_items() {
355        let source = r#"let x = 10;
356@@@ broken stuff here
357let y = 20;"#;
358        let result = parse_program_resilient(source);
359        assert!(!result.errors.is_empty(), "Expected some errors");
360        assert!(
361            !result.items.is_empty() || result.has_only_grammar_failures(),
362            "Expected partial items or explicit grammar failures, got: {:?}",
363            result.errors
364        );
365    }
366
367    #[test]
368    fn test_resilient_parse_recovers_after_bad_function() {
369        let source = r#"
370function good() {
371    return 1;
372}
373
374function bad( {
375    missing params
376}
377
378let x = 42;
379"#;
380        let result = parse_program_resilient(source);
381        assert!(!result.errors.is_empty(), "Expected parse issues");
382        assert!(
383            result.items.len() >= 1 || result.has_only_grammar_failures(),
384            "Expected partial items or grammar-failure issues, got {} items and errors: {:?}",
385            result.items.len(),
386            result.errors
387        );
388    }
389
390    #[test]
391    fn test_resilient_parse_empty_source() {
392        let result = parse_program_resilient("");
393        assert!(result.items.is_empty());
394        assert!(result.errors.is_empty());
395    }
396
397    #[test]
398    fn test_resilient_parse_only_errors() {
399        let source = "@@@ !!! ??? garbage";
400        let result = parse_program_resilient(source);
401        assert!(
402            !result.errors.is_empty(),
403            "Expected errors for garbage input"
404        );
405    }
406
407    #[test]
408    fn test_partial_program_into_program() {
409        let source = "let x = 10;";
410        let result = parse_program_resilient(source);
411        let program = result.into_program();
412        assert_eq!(program.items.len(), 1);
413    }
414
415    #[test]
416    fn test_reports_misspelled_from_use_keyword_with_token_span() {
417        let source = "from std::core::snapshot duse { Snapshot }\nlet x = 1;\n";
418        let result = parse_program_resilient(source);
419
420        let specific = result
421            .errors
422            .iter()
423            .find(|e| e.kind == ParseErrorKind::MalformedFromUse)
424            .expect("expected targeted malformed import diagnostic");
425
426        let bad = &source[specific.span.0..specific.span.1];
427        assert_eq!(bad, "duse");
428    }
429
430    #[test]
431    fn test_empty_match_does_not_emit_misleading_from_identifier_error() {
432        let source = r#"
433from std::core::snapshot use { Snapshot }
434
435let x = {x: 1}
436let y = | x | 10*(x.x*2)
437print(f"this is {y(x)}")
438
439x.y = 1
440let i = 10D
441
442let c = "d"
443
444fn afunc(c) {
445  print("func called with " + c)
446  match c {
447
448  }
449  return c
450}
451
452print(afunc(x))
453"#;
454
455        let result = parse_program_resilient(source);
456        assert!(
457            !result
458                .errors
459                .iter()
460                .any(|e| e.message.contains("found identifier `from`")),
461            "resilient parser produced misleading import-token error: {:?}",
462            result.errors
463        );
464    }
465
466    #[test]
467    fn test_resilient_parse_keeps_typed_match_after_commented_line() {
468        let source = r#"
469from std::core::snapshot use { Snapshot }
470
471fn afunc(c) {
472  //print("func called with " + c)
473  let result = match c {
474    c: int => c + 1
475    _ => 1
476  }
477  return c
478  return "hi"
479}
480"#;
481
482        let result = parse_program_resilient(source);
483        assert!(
484            result
485                .items
486                .iter()
487                .any(|item| matches!(item, crate::ast::Item::Function(_, _))),
488            "expected function item to parse, got: {:?}",
489            result.items
490        );
491    }
492
493    #[test]
494    fn test_detect_empty_match_reports_precise_span() {
495        let source = "fn f(x) {\n  match x {\n\n  }\n}\n";
496        let errors = detect_empty_match(source);
497        assert!(
498            errors.iter().any(|e| e.kind == ParseErrorKind::EmptyMatch),
499            "expected empty match issue, got: {:?}",
500            errors
501        );
502    }
503}