1use crate::ast::{Item, Program};
8use crate::parser::{Rule, ShapeParser, parse_item};
9use pest::Parser;
10use pest::error::InputLocation;
11
12#[derive(Debug, Clone)]
14pub struct PartialProgram {
15 pub items: Vec<Item>,
17 pub doc_comment: Option<crate::ast::DocComment>,
19 pub errors: Vec<ParseError>,
21}
22
23impl PartialProgram {
24 pub fn into_program(self) -> Program {
26 let mut program = Program {
27 items: self.items,
28 docs: crate::ast::ProgramDocs::default(),
29 };
30 program.docs = crate::parser::docs::build_program_docs(&program, self.doc_comment.as_ref());
31 program
32 }
33
34 pub fn is_complete(&self) -> bool {
36 self.errors.is_empty()
37 }
38
39 pub fn has_only_grammar_failures(&self) -> bool {
41 !self.errors.is_empty()
42 && self
43 .errors
44 .iter()
45 .all(|e| matches!(e.kind, ParseErrorKind::GrammarFailure))
46 }
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
51pub enum ParseErrorKind {
52 RecoverySyntax,
53 ItemConversion,
54 GrammarFailure,
55 MalformedFromUse,
56 EmptyMatch,
57}
58
59#[derive(Debug, Clone)]
61pub struct ParseError {
62 pub kind: ParseErrorKind,
63 pub message: String,
64 pub span: (usize, usize),
65}
66
67pub fn parse_program_resilient(source: &str) -> PartialProgram {
74 let mut items = Vec::new();
75 let mut doc_comment = None;
76 let mut errors = Vec::new();
77
78 match ShapeParser::parse(Rule::program, source) {
79 Ok(pairs) => collect_pairs(pairs, 0, &mut items, &mut doc_comment, &mut errors),
80 Err(pest_err) => {
81 errors.push(parse_error_from_pest(&pest_err, source));
82 recover_items_before_grammar_failure(source, &pest_err, &mut items, &mut errors);
83 }
84 }
85
86 errors.extend(detect_malformed_from_use(source));
88 errors.extend(detect_empty_match(source));
89
90 dedup_and_sort_errors(&mut errors);
91
92 PartialProgram {
93 items,
94 doc_comment,
95 errors,
96 }
97}
98
99fn collect_pairs(
100 pairs: pest::iterators::Pairs<Rule>,
101 base_offset: usize,
102 items: &mut Vec<Item>,
103 doc_comment: &mut Option<crate::ast::DocComment>,
104 errors: &mut Vec<ParseError>,
105) {
106 for pair in pairs {
107 if pair.as_rule() != Rule::program {
108 continue;
109 }
110
111 for inner in pair.into_inner() {
112 match inner.as_rule() {
113 Rule::program_doc_comment => {
114 *doc_comment = Some(crate::parser::docs::parse_doc_comment(inner));
115 }
116 Rule::item => match parse_item(inner.clone()) {
117 Ok(item) => items.push(item),
118 Err(e) => {
119 let span = inner.as_span();
120 errors.push(ParseError {
121 kind: ParseErrorKind::ItemConversion,
122 message: format!("Failed to parse item: {}", e),
123 span: (base_offset + span.start(), base_offset + span.end()),
124 });
125 }
126 },
127 Rule::item_recovery => {
128 let span = inner.as_span();
129 let text = inner.as_str().trim();
130 let preview = if text.len() > 40 {
131 format!("{}...", &text[..40])
132 } else {
133 text.to_string()
134 };
135 errors.push(ParseError {
136 kind: ParseErrorKind::RecoverySyntax,
137 message: format!("Syntax error near: {}", preview),
138 span: (base_offset + span.start(), base_offset + span.end()),
139 });
140 }
141 Rule::EOI => {}
142 _ => {}
143 }
144 }
145 }
146}
147
148fn recover_items_before_grammar_failure(
149 source: &str,
150 err: &pest::error::Error<Rule>,
151 items: &mut Vec<Item>,
152 errors: &mut Vec<ParseError>,
153) {
154 let cutoff = match err.location {
155 InputLocation::Pos(pos) => pos.min(source.len()),
156 InputLocation::Span((start, _)) => start.min(source.len()),
157 };
158
159 if cutoff == 0 {
160 return;
161 }
162
163 for candidate in prefix_cutoffs(source, cutoff) {
164 if candidate == 0 {
165 continue;
166 }
167 let prefix = &source[..candidate];
168 if let Ok(pairs) = ShapeParser::parse(Rule::program, prefix) {
169 let mut doc_comment = None;
170 collect_pairs(pairs, 0, items, &mut doc_comment, errors);
171 return;
172 }
173 }
174}
175
176fn prefix_cutoffs(source: &str, cutoff: usize) -> Vec<usize> {
177 let mut out = Vec::new();
178 let mut current = cutoff.min(source.len());
179 let mut attempts = 0usize;
180
181 while current > 0 && attempts < 64 {
182 out.push(current);
183 if let Some(prev_newline) = source[..current].rfind('\n') {
184 current = prev_newline;
185 } else {
186 break;
187 }
188 attempts += 1;
189 }
190
191 out
192}
193
194fn parse_error_from_pest(err: &pest::error::Error<Rule>, source: &str) -> ParseError {
195 let (start, end) = match err.location {
196 InputLocation::Pos(pos) => {
197 let s = pos.min(source.len());
198 (s, (s + 1).min(source.len()))
199 }
200 InputLocation::Span((start, end)) => {
201 let s = start.min(source.len());
202 let e = end.min(source.len());
203 if e > s {
204 (s, e)
205 } else {
206 (s, (s + 1).min(source.len()))
207 }
208 }
209 };
210
211 ParseError {
212 kind: ParseErrorKind::GrammarFailure,
213 message: format!("Parse error: {}", err),
214 span: (start, end),
215 }
216}
217
218fn dedup_and_sort_errors(errors: &mut Vec<ParseError>) {
219 errors.sort_by_key(|e| (e.span.0, e.span.1, e.kind));
220 errors.dedup_by(|a, b| a.kind == b.kind && a.span == b.span && a.message == b.message);
221}
222
223fn detect_malformed_from_use(source: &str) -> Vec<ParseError> {
228 let mut out = Vec::new();
229 let mut line_base = 0usize;
230
231 for line in source.lines() {
232 let trimmed = line.trim_start();
233 let indent = line.len().saturating_sub(trimmed.len());
234
235 if !trimmed.starts_with("from ") {
236 line_base += line.len() + 1;
237 continue;
238 }
239
240 let mut parts = trimmed.split_whitespace();
241 let _from = parts.next();
242 let _path = parts.next();
243 let keyword = parts.next();
244
245 let Some(found) = keyword else {
246 line_base += line.len() + 1;
247 continue;
248 };
249
250 if found == "use" || found == "in" {
252 line_base += line.len() + 1;
253 continue;
254 }
255
256 if let Some(col) = trimmed.find(found) {
257 let start = line_base + indent + col;
258 let end = start + found.len();
259 out.push(ParseError {
260 kind: ParseErrorKind::MalformedFromUse,
261 message: format!(
262 "expected keyword 'use' after module path, found '{}'",
263 found
264 ),
265 span: (start, end),
266 });
267 }
268
269 line_base += line.len() + 1;
270 }
271
272 out
273}
274
275fn detect_empty_match(source: &str) -> Vec<ParseError> {
282 let mut out = Vec::new();
283 let mut search_from = 0usize;
284
285 while let Some(rel_match) = source[search_from..].find("match") {
286 let match_start = search_from + rel_match;
287
288 let prev_ok = match_start == 0
290 || !source[..match_start]
291 .chars()
292 .next_back()
293 .is_some_and(|c| c.is_alphanumeric() || c == '_');
294 if !prev_ok {
295 search_from = match_start + "match".len();
296 continue;
297 }
298
299 let after_match = &source[match_start + "match".len()..];
300 let Some(open_rel) = after_match.find('{') else {
301 search_from = match_start + "match".len();
302 continue;
303 };
304 let open = match_start + "match".len() + open_rel;
305
306 let Some(close_rel) = source[open + 1..].find('}') else {
307 search_from = open + 1;
308 continue;
309 };
310 let close = open + 1 + close_rel;
311
312 let between = &source[open + 1..close];
313 let non_comment_content = between
314 .lines()
315 .map(|line| line.split_once("//").map(|(head, _)| head).unwrap_or(line))
316 .collect::<String>();
317
318 if non_comment_content.trim().is_empty() {
319 out.push(ParseError {
320 kind: ParseErrorKind::EmptyMatch,
321 message: "match expression requires at least one arm".to_string(),
322 span: (open, close + 1),
323 });
324 }
325
326 search_from = close + 1;
327 }
328
329 out
330}
331
332#[cfg(test)]
333mod tests {
334 use super::*;
335
336 #[test]
337 fn test_resilient_parse_valid_program() {
338 let source = r#"
339 let x = 10;
340 let y = 20;
341 "#;
342 let result = parse_program_resilient(source);
343 assert!(
344 result.errors.is_empty(),
345 "Expected no errors: {:?}",
346 result.errors
347 );
348 assert_eq!(result.items.len(), 2);
349 assert!(result.is_complete());
350 }
351
352 #[test]
353 fn test_resilient_parse_with_error_between_items() {
354 let source = r#"let x = 10;
355@@@ broken stuff here
356let y = 20;"#;
357 let result = parse_program_resilient(source);
358 assert!(!result.errors.is_empty(), "Expected some errors");
359 assert!(
360 !result.items.is_empty() || result.has_only_grammar_failures(),
361 "Expected partial items or explicit grammar failures, got: {:?}",
362 result.errors
363 );
364 }
365
366 #[test]
367 fn test_resilient_parse_recovers_after_bad_function() {
368 let source = r#"
369function good() {
370 return 1;
371}
372
373function bad( {
374 missing params
375}
376
377let x = 42;
378"#;
379 let result = parse_program_resilient(source);
380 assert!(!result.errors.is_empty(), "Expected parse issues");
381 assert!(
382 result.items.len() >= 1 || result.has_only_grammar_failures(),
383 "Expected partial items or grammar-failure issues, got {} items and errors: {:?}",
384 result.items.len(),
385 result.errors
386 );
387 }
388
389 #[test]
390 fn test_resilient_parse_empty_source() {
391 let result = parse_program_resilient("");
392 assert!(result.items.is_empty());
393 assert!(result.errors.is_empty());
394 }
395
396 #[test]
397 fn test_resilient_parse_only_errors() {
398 let source = "@@@ !!! ??? garbage";
399 let result = parse_program_resilient(source);
400 assert!(
401 !result.errors.is_empty(),
402 "Expected errors for garbage input"
403 );
404 }
405
406 #[test]
407 fn test_partial_program_into_program() {
408 let source = "let x = 10;";
409 let result = parse_program_resilient(source);
410 let program = result.into_program();
411 assert_eq!(program.items.len(), 1);
412 }
413
414 #[test]
415 fn test_reports_misspelled_from_use_keyword_with_token_span() {
416 let source = "from std::core::snapshot duse { Snapshot }\nlet x = 1;\n";
417 let result = parse_program_resilient(source);
418
419 let specific = result
420 .errors
421 .iter()
422 .find(|e| e.kind == ParseErrorKind::MalformedFromUse)
423 .expect("expected targeted malformed import diagnostic");
424
425 let bad = &source[specific.span.0..specific.span.1];
426 assert_eq!(bad, "duse");
427 }
428
429 #[test]
430 fn test_empty_match_does_not_emit_misleading_from_identifier_error() {
431 let source = r#"
432from std::core::snapshot use { Snapshot }
433
434let x = {x: 1}
435let y = | x | 10*(x.x*2)
436print(f"this is {y(x)}")
437
438x.y = 1
439let i = 10D
440
441let c = "d"
442
443fn afunc(c) {
444 print("func called with " + c)
445 match c {
446
447 }
448 return c
449}
450
451print(afunc(x))
452"#;
453
454 let result = parse_program_resilient(source);
455 assert!(
456 !result
457 .errors
458 .iter()
459 .any(|e| e.message.contains("found identifier `from`")),
460 "resilient parser produced misleading import-token error: {:?}",
461 result.errors
462 );
463 }
464
465 #[test]
466 fn test_resilient_parse_keeps_typed_match_after_commented_line() {
467 let source = r#"
468from std::core::snapshot use { Snapshot }
469
470fn afunc(c) {
471 //print("func called with " + c)
472 let result = match c {
473 c: int => c + 1
474 _ => 1
475 }
476 return c
477 return "hi"
478}
479"#;
480
481 let result = parse_program_resilient(source);
482 assert!(
483 result
484 .items
485 .iter()
486 .any(|item| matches!(item, crate::ast::Item::Function(_, _))),
487 "expected function item to parse, got: {:?}",
488 result.items
489 );
490 }
491
492 #[test]
493 fn test_detect_empty_match_reports_precise_span() {
494 let source = "fn f(x) {\n match x {\n\n }\n}\n";
495 let errors = detect_empty_match(source);
496 assert!(
497 errors.iter().any(|e| e.kind == ParseErrorKind::EmptyMatch),
498 "expected empty match issue, got: {:?}",
499 errors
500 );
501 }
502}