Skip to main content

shape_ast/parser/
resilient.rs

1//! Resilient parser for Shape language.
2//!
3//! `parse_program_resilient` always returns a partial program and a list of
4//! typed parse issues. This is intended for editor/LSP scenarios where partial
5//! ASTs are more useful than hard parse failure.
6
7use crate::ast::{Item, Program};
8use crate::parser::{Rule, ShapeParser, parse_item};
9use pest::Parser;
10use pest::error::InputLocation;
11
12/// A partially parsed program — always produced, never fails.
13#[derive(Debug, Clone)]
14pub struct PartialProgram {
15    /// Successfully parsed top-level items.
16    pub items: Vec<Item>,
17    /// Module-level doc comment declared at the start of the file.
18    pub doc_comment: Option<crate::ast::DocComment>,
19    /// Parse issues collected during resilient parsing.
20    pub errors: Vec<ParseError>,
21}
22
23impl PartialProgram {
24    /// Convert to a standard Program (dropping parse issue info).
25    pub fn into_program(self) -> Program {
26        let mut program = Program {
27            items: self.items,
28            docs: crate::ast::ProgramDocs::default(),
29        };
30        program.docs = crate::parser::docs::build_program_docs(&program, self.doc_comment.as_ref());
31        program
32    }
33
34    /// Whether the parse was completely successful (no issues).
35    pub fn is_complete(&self) -> bool {
36        self.errors.is_empty()
37    }
38
39    /// True when every recorded issue is a grammar-level failure.
40    pub fn has_only_grammar_failures(&self) -> bool {
41        !self.errors.is_empty()
42            && self
43                .errors
44                .iter()
45                .all(|e| matches!(e.kind, ParseErrorKind::GrammarFailure))
46    }
47}
48
49/// Kind of resilient parse issue.
50#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
51pub enum ParseErrorKind {
52    RecoverySyntax,
53    ItemConversion,
54    GrammarFailure,
55    MalformedFromUse,
56    EmptyMatch,
57}
58
59/// A parse issue with span information.
60#[derive(Debug, Clone)]
61pub struct ParseError {
62    pub kind: ParseErrorKind,
63    pub message: String,
64    pub span: (usize, usize),
65}
66
67/// Parse a Shape program resiliently. Always succeeds.
68///
69/// - Uses the normal parser and collects `item_recovery` nodes as syntax issues.
70/// - Records AST conversion failures per item.
71/// - If grammar-level parsing fails, records a grammar failure issue.
72/// - Runs targeted source-level diagnostics (malformed `from ... use`, empty match).
73pub fn parse_program_resilient(source: &str) -> PartialProgram {
74    let mut items = Vec::new();
75    let mut doc_comment = None;
76    let mut errors = Vec::new();
77
78    match ShapeParser::parse(Rule::program, source) {
79        Ok(pairs) => collect_pairs(pairs, 0, &mut items, &mut doc_comment, &mut errors),
80        Err(pest_err) => {
81            errors.push(parse_error_from_pest(&pest_err, source));
82            recover_items_before_grammar_failure(source, &pest_err, &mut items, &mut errors);
83        }
84    }
85
86    // Targeted parse diagnostics (single-source resilient pipeline).
87    errors.extend(detect_malformed_from_use(source));
88    errors.extend(detect_empty_match(source));
89
90    dedup_and_sort_errors(&mut errors);
91
92    PartialProgram {
93        items,
94        doc_comment,
95        errors,
96    }
97}
98
99fn collect_pairs(
100    pairs: pest::iterators::Pairs<Rule>,
101    base_offset: usize,
102    items: &mut Vec<Item>,
103    doc_comment: &mut Option<crate::ast::DocComment>,
104    errors: &mut Vec<ParseError>,
105) {
106    for pair in pairs {
107        if pair.as_rule() != Rule::program {
108            continue;
109        }
110
111        for inner in pair.into_inner() {
112            match inner.as_rule() {
113                Rule::program_doc_comment => {
114                    *doc_comment = Some(crate::parser::docs::parse_doc_comment(inner));
115                }
116                Rule::item => match parse_item(inner.clone()) {
117                    Ok(item) => items.push(item),
118                    Err(e) => {
119                        let span = inner.as_span();
120                        errors.push(ParseError {
121                            kind: ParseErrorKind::ItemConversion,
122                            message: format!("Failed to parse item: {}", e),
123                            span: (base_offset + span.start(), base_offset + span.end()),
124                        });
125                    }
126                },
127                Rule::item_recovery => {
128                    let span = inner.as_span();
129                    let text = inner.as_str().trim();
130                    let preview = if text.len() > 40 {
131                        format!("{}...", &text[..40])
132                    } else {
133                        text.to_string()
134                    };
135                    errors.push(ParseError {
136                        kind: ParseErrorKind::RecoverySyntax,
137                        message: format!("Syntax error near: {}", preview),
138                        span: (base_offset + span.start(), base_offset + span.end()),
139                    });
140                }
141                Rule::EOI => {}
142                _ => {}
143            }
144        }
145    }
146}
147
148fn recover_items_before_grammar_failure(
149    source: &str,
150    err: &pest::error::Error<Rule>,
151    items: &mut Vec<Item>,
152    errors: &mut Vec<ParseError>,
153) {
154    let cutoff = match err.location {
155        InputLocation::Pos(pos) => pos.min(source.len()),
156        InputLocation::Span((start, _)) => start.min(source.len()),
157    };
158
159    if cutoff == 0 {
160        return;
161    }
162
163    for candidate in prefix_cutoffs(source, cutoff) {
164        if candidate == 0 {
165            continue;
166        }
167        let prefix = &source[..candidate];
168        if let Ok(pairs) = ShapeParser::parse(Rule::program, prefix) {
169            let mut doc_comment = None;
170            collect_pairs(pairs, 0, items, &mut doc_comment, errors);
171            return;
172        }
173    }
174}
175
176fn prefix_cutoffs(source: &str, cutoff: usize) -> Vec<usize> {
177    let mut out = Vec::new();
178    let mut current = cutoff.min(source.len());
179    let mut attempts = 0usize;
180
181    while current > 0 && attempts < 64 {
182        out.push(current);
183        if let Some(prev_newline) = source[..current].rfind('\n') {
184            current = prev_newline;
185        } else {
186            break;
187        }
188        attempts += 1;
189    }
190
191    out
192}
193
194fn parse_error_from_pest(err: &pest::error::Error<Rule>, source: &str) -> ParseError {
195    let (start, end) = match err.location {
196        InputLocation::Pos(pos) => {
197            let s = pos.min(source.len());
198            (s, (s + 1).min(source.len()))
199        }
200        InputLocation::Span((start, end)) => {
201            let s = start.min(source.len());
202            let e = end.min(source.len());
203            if e > s {
204                (s, e)
205            } else {
206                (s, (s + 1).min(source.len()))
207            }
208        }
209    };
210
211    ParseError {
212        kind: ParseErrorKind::GrammarFailure,
213        message: format!("Parse error: {}", err),
214        span: (start, end),
215    }
216}
217
218fn dedup_and_sort_errors(errors: &mut Vec<ParseError>) {
219    errors.sort_by_key(|e| (e.span.0, e.span.1, e.kind));
220    errors.dedup_by(|a, b| a.kind == b.kind && a.span == b.span && a.message == b.message);
221}
222
223/// Best-effort targeted recovery for malformed `from <module> use { ... }` lines.
224///
225/// When `use` is misspelled (e.g. `duse`), grammar-level errors can point to
226/// the leading `from` token. This helper reports the actual offending token.
227fn detect_malformed_from_use(source: &str) -> Vec<ParseError> {
228    let mut out = Vec::new();
229    let mut line_base = 0usize;
230
231    for line in source.lines() {
232        let trimmed = line.trim_start();
233        let indent = line.len().saturating_sub(trimmed.len());
234
235        if !trimmed.starts_with("from ") {
236            line_base += line.len() + 1;
237            continue;
238        }
239
240        let mut parts = trimmed.split_whitespace();
241        let _from = parts.next();
242        let _path = parts.next();
243        let keyword = parts.next();
244
245        let Some(found) = keyword else {
246            line_base += line.len() + 1;
247            continue;
248        };
249
250        // `from ... in ...` is query syntax, not import syntax.
251        if found == "use" || found == "in" {
252            line_base += line.len() + 1;
253            continue;
254        }
255
256        if let Some(col) = trimmed.find(found) {
257            let start = line_base + indent + col;
258            let end = start + found.len();
259            out.push(ParseError {
260                kind: ParseErrorKind::MalformedFromUse,
261                message: format!(
262                    "expected keyword 'use' after module path, found '{}'",
263                    found
264                ),
265                span: (start, end),
266            });
267        }
268
269        line_base += line.len() + 1;
270    }
271
272    out
273}
274
275/// Detect empty match expressions:
276///
277/// ```text
278/// match value {
279/// }
280/// ```
281fn detect_empty_match(source: &str) -> Vec<ParseError> {
282    let mut out = Vec::new();
283    let mut search_from = 0usize;
284
285    while let Some(rel_match) = source[search_from..].find("match") {
286        let match_start = search_from + rel_match;
287
288        // Ensure token boundary for `match`.
289        let prev_ok = match_start == 0
290            || !source[..match_start]
291                .chars()
292                .next_back()
293                .is_some_and(|c| c.is_alphanumeric() || c == '_');
294        if !prev_ok {
295            search_from = match_start + "match".len();
296            continue;
297        }
298
299        let after_match = &source[match_start + "match".len()..];
300        let Some(open_rel) = after_match.find('{') else {
301            search_from = match_start + "match".len();
302            continue;
303        };
304        let open = match_start + "match".len() + open_rel;
305
306        let Some(close_rel) = source[open + 1..].find('}') else {
307            search_from = open + 1;
308            continue;
309        };
310        let close = open + 1 + close_rel;
311
312        let between = &source[open + 1..close];
313        let non_comment_content = between
314            .lines()
315            .map(|line| line.split_once("//").map(|(head, _)| head).unwrap_or(line))
316            .collect::<String>();
317
318        if non_comment_content.trim().is_empty() {
319            out.push(ParseError {
320                kind: ParseErrorKind::EmptyMatch,
321                message: "match expression requires at least one arm".to_string(),
322                span: (open, close + 1),
323            });
324        }
325
326        search_from = close + 1;
327    }
328
329    out
330}
331
332#[cfg(test)]
333mod tests {
334    use super::*;
335
336    #[test]
337    fn test_resilient_parse_valid_program() {
338        let source = r#"
339            let x = 10;
340            let y = 20;
341        "#;
342        let result = parse_program_resilient(source);
343        assert!(
344            result.errors.is_empty(),
345            "Expected no errors: {:?}",
346            result.errors
347        );
348        assert_eq!(result.items.len(), 2);
349        assert!(result.is_complete());
350    }
351
352    #[test]
353    fn test_resilient_parse_with_error_between_items() {
354        let source = r#"let x = 10;
355@@@ broken stuff here
356let y = 20;"#;
357        let result = parse_program_resilient(source);
358        assert!(!result.errors.is_empty(), "Expected some errors");
359        assert!(
360            !result.items.is_empty() || result.has_only_grammar_failures(),
361            "Expected partial items or explicit grammar failures, got: {:?}",
362            result.errors
363        );
364    }
365
366    #[test]
367    fn test_resilient_parse_recovers_after_bad_function() {
368        let source = r#"
369function good() {
370    return 1;
371}
372
373function bad( {
374    missing params
375}
376
377let x = 42;
378"#;
379        let result = parse_program_resilient(source);
380        assert!(!result.errors.is_empty(), "Expected parse issues");
381        assert!(
382            result.items.len() >= 1 || result.has_only_grammar_failures(),
383            "Expected partial items or grammar-failure issues, got {} items and errors: {:?}",
384            result.items.len(),
385            result.errors
386        );
387    }
388
389    #[test]
390    fn test_resilient_parse_empty_source() {
391        let result = parse_program_resilient("");
392        assert!(result.items.is_empty());
393        assert!(result.errors.is_empty());
394    }
395
396    #[test]
397    fn test_resilient_parse_only_errors() {
398        let source = "@@@ !!! ??? garbage";
399        let result = parse_program_resilient(source);
400        assert!(
401            !result.errors.is_empty(),
402            "Expected errors for garbage input"
403        );
404    }
405
406    #[test]
407    fn test_partial_program_into_program() {
408        let source = "let x = 10;";
409        let result = parse_program_resilient(source);
410        let program = result.into_program();
411        assert_eq!(program.items.len(), 1);
412    }
413
414    #[test]
415    fn test_reports_misspelled_from_use_keyword_with_token_span() {
416        let source = "from std::core::snapshot duse { Snapshot }\nlet x = 1;\n";
417        let result = parse_program_resilient(source);
418
419        let specific = result
420            .errors
421            .iter()
422            .find(|e| e.kind == ParseErrorKind::MalformedFromUse)
423            .expect("expected targeted malformed import diagnostic");
424
425        let bad = &source[specific.span.0..specific.span.1];
426        assert_eq!(bad, "duse");
427    }
428
429    #[test]
430    fn test_empty_match_does_not_emit_misleading_from_identifier_error() {
431        let source = r#"
432from std::core::snapshot use { Snapshot }
433
434let x = {x: 1}
435let y = | x | 10*(x.x*2)
436print(f"this is {y(x)}")
437
438x.y = 1
439let i = 10D
440
441let c = "d"
442
443fn afunc(c) {
444  print("func called with " + c)
445  match c {
446
447  }
448  return c
449}
450
451print(afunc(x))
452"#;
453
454        let result = parse_program_resilient(source);
455        assert!(
456            !result
457                .errors
458                .iter()
459                .any(|e| e.message.contains("found identifier `from`")),
460            "resilient parser produced misleading import-token error: {:?}",
461            result.errors
462        );
463    }
464
465    #[test]
466    fn test_resilient_parse_keeps_typed_match_after_commented_line() {
467        let source = r#"
468from std::core::snapshot use { Snapshot }
469
470fn afunc(c) {
471  //print("func called with " + c)
472  let result = match c {
473    c: int => c + 1
474    _ => 1
475  }
476  return c
477  return "hi"
478}
479"#;
480
481        let result = parse_program_resilient(source);
482        assert!(
483            result
484                .items
485                .iter()
486                .any(|item| matches!(item, crate::ast::Item::Function(_, _))),
487            "expected function item to parse, got: {:?}",
488            result.items
489        );
490    }
491
492    #[test]
493    fn test_detect_empty_match_reports_precise_span() {
494        let source = "fn f(x) {\n  match x {\n\n  }\n}\n";
495        let errors = detect_empty_match(source);
496        assert!(
497            errors.iter().any(|e| e.kind == ParseErrorKind::EmptyMatch),
498            "expected empty match issue, got: {:?}",
499            errors
500        );
501    }
502}