Skip to main content

shape_ast/error/
pest_converter.rs

1//! Pest error to structured error conversion
2//!
3//! Converts pest's `Error<Rule>` into `StructuredParseError` for rich rendering.
4
5use pest::error::{Error as PestError, ErrorVariant, LineColLocation};
6
7use super::{
8    ErrorCode, ExpectedToken, ParseErrorKind, SourceLocation, StructuredParseError, Suggestion,
9    TextEdit, TokenCategory, TokenInfo, TokenKind, parse_error::SourceContext,
10};
11use crate::parser::Rule;
12
13/// Convert a pest error into a structured parse error
14pub fn convert_pest_error(pest_error: &PestError<Rule>, source: &str) -> StructuredParseError {
15    // Extract location
16    let location = extract_location(pest_error);
17
18    // Extract span end for range errors
19    let span_end = extract_span_end(pest_error);
20
21    // Convert the error variant to our structured kind
22    let kind = convert_variant(&pest_error.variant, source, &location);
23
24    // Build source context
25    let source_context = SourceContext::from_source(source, &location, span_end);
26
27    // Generate suggestions based on error kind
28    let suggestions = generate_suggestions(&kind, source, &location);
29
30    // Determine error code
31    let code = determine_error_code(&kind);
32
33    StructuredParseError::new(kind, location)
34        .with_source_context(source_context)
35        .with_suggestions(suggestions)
36        .with_code(code)
37}
38
39fn extract_location(error: &PestError<Rule>) -> SourceLocation {
40    match &error.line_col {
41        LineColLocation::Pos((line, col)) => SourceLocation::new(*line, *col),
42        LineColLocation::Span((start_line, start_col), _) => {
43            SourceLocation::new(*start_line, *start_col)
44        }
45    }
46}
47
48fn extract_span_end(error: &PestError<Rule>) -> Option<(usize, usize)> {
49    match &error.line_col {
50        LineColLocation::Span(_, (end_line, end_col)) => Some((*end_line, *end_col)),
51        LineColLocation::Pos(_) => None,
52    }
53}
54
55fn convert_variant(
56    variant: &ErrorVariant<Rule>,
57    source: &str,
58    location: &SourceLocation,
59) -> ParseErrorKind {
60    match variant {
61        ErrorVariant::ParsingError {
62            positives,
63            negatives: _,
64        } => {
65            // Convert pest's positives (expected rules) to our expected tokens
66            let expected: Vec<ExpectedToken> = positives
67                .iter()
68                .filter_map(rule_to_expected_token)
69                .collect();
70
71            // Get the actual token at this position
72            let found = extract_found_token(source, location);
73
74            // Check if we're at end of input
75            if matches!(found.kind, Some(TokenKind::EndOfInput)) {
76                ParseErrorKind::UnexpectedEof { expected }
77            } else {
78                ParseErrorKind::UnexpectedToken { found, expected }
79            }
80        }
81        ErrorVariant::CustomError { message } => {
82            // Try to parse semantic meaning from custom errors
83            parse_custom_error(message, location)
84        }
85    }
86}
87
88/// Convert a pest Rule to an ExpectedToken
89fn rule_to_expected_token(rule: &Rule) -> Option<ExpectedToken> {
90    // Map rules to user-friendly expectations
91    match rule {
92        Rule::ident => Some(ExpectedToken::Category(TokenCategory::Identifier)),
93        Rule::expression | Rule::primary_expr | Rule::postfix_expr => {
94            Some(ExpectedToken::Category(TokenCategory::Expression))
95        }
96        Rule::statement => Some(ExpectedToken::Category(TokenCategory::Statement)),
97        Rule::number | Rule::integer => Some(ExpectedToken::Category(TokenCategory::Literal)),
98        Rule::string => Some(ExpectedToken::Rule("string".to_string())),
99        Rule::function_def => Some(ExpectedToken::Rule("function_def".to_string())),
100        Rule::variable_decl => Some(ExpectedToken::Rule("variable_decl".to_string())),
101        Rule::type_annotation => Some(ExpectedToken::Rule("type_annotation".to_string())),
102        Rule::if_stmt | Rule::if_expr => Some(ExpectedToken::Rule("if_stmt".to_string())),
103        Rule::for_loop | Rule::for_expr => Some(ExpectedToken::Rule("for_loop".to_string())),
104        Rule::while_loop | Rule::while_expr => Some(ExpectedToken::Rule("while_loop".to_string())),
105        Rule::return_stmt => Some(ExpectedToken::Rule("return_stmt".to_string())),
106        Rule::query => Some(ExpectedToken::Rule("query".to_string())),
107        Rule::import_stmt => Some(ExpectedToken::Rule("import_stmt".to_string())),
108        Rule::pub_item => Some(ExpectedToken::Rule("pub_item".to_string())),
109        Rule::array_literal => Some(ExpectedToken::Rule("array_literal".to_string())),
110        Rule::object_literal => Some(ExpectedToken::Rule("object_literal".to_string())),
111        Rule::match_expr => Some(ExpectedToken::Rule("match_expr".to_string())),
112        Rule::match_arm => Some(ExpectedToken::Rule("match_arm".to_string())),
113        Rule::block_expr => Some(ExpectedToken::Rule("block_expr".to_string())),
114        Rule::function_body => Some(ExpectedToken::Rule("function_body".to_string())),
115        Rule::function_params => Some(ExpectedToken::Rule("function_params".to_string())),
116        Rule::pattern => Some(ExpectedToken::Category(TokenCategory::Pattern)),
117        Rule::primary_type | Rule::basic_type | Rule::generic_type => {
118            Some(ExpectedToken::Category(TokenCategory::Type))
119        }
120        Rule::join_kind => Some(ExpectedToken::Rule("join_kind".to_string())),
121        Rule::comptime_annotation_handler_phase => Some(ExpectedToken::Rule(
122            "comptime_annotation_handler_phase".to_string(),
123        )),
124        Rule::annotation_handler_kind => {
125            Some(ExpectedToken::Rule("annotation_handler_kind".to_string()))
126        }
127        Rule::stream_def => Some(ExpectedToken::Rule("stream_def".to_string())),
128        Rule::enum_def => Some(ExpectedToken::Rule("enum_def".to_string())),
129        Rule::struct_type_def => Some(ExpectedToken::Rule("struct_type_def".to_string())),
130        Rule::trait_def => Some(ExpectedToken::Rule("trait_def".to_string())),
131        Rule::impl_block => Some(ExpectedToken::Rule("impl_block".to_string())),
132        Rule::return_type => Some(ExpectedToken::Rule("return_type".to_string())),
133
134        // Internal rules we don't want to show
135        Rule::EOI | Rule::WHITESPACE | Rule::COMMENT => None,
136        Rule::program | Rule::item => None,
137
138        // For unknown rules, return None to filter them out
139        _ => None,
140    }
141}
142
143fn extract_found_token(source: &str, location: &SourceLocation) -> TokenInfo {
144    let lines: Vec<&str> = source.lines().collect();
145    if location.line == 0 || location.line > lines.len() {
146        return TokenInfo::end_of_input();
147    }
148
149    let line = lines[location.line - 1];
150    if location.column == 0 {
151        return TokenInfo::new("").with_kind(TokenKind::Unknown);
152    }
153
154    // Convert char-based column to byte offset (Pest columns count characters, not bytes)
155    let col0 = location.column - 1;
156    let byte_offset = line
157        .char_indices()
158        .nth(col0)
159        .map(|(i, _)| i);
160
161    let Some(byte_offset) = byte_offset else {
162        // Column is past the end of the line
163        if location.line >= lines.len() {
164            return TokenInfo::end_of_input();
165        }
166        return TokenInfo::new("").with_kind(TokenKind::Unknown);
167    };
168
169    // Extract a token starting at the position
170    let rest = &line[byte_offset..];
171    let token_text = extract_token_text(rest);
172    let kind = classify_token(&token_text);
173
174    TokenInfo::new(token_text).with_kind(kind)
175}
176
177fn extract_token_text(s: &str) -> String {
178    let mut chars = s.chars().peekable();
179    let first = match chars.next() {
180        Some(c) => c,
181        None => return String::new(),
182    };
183
184    // Identifier or keyword
185    if first.is_alphabetic() || first == '_' {
186        let mut text = String::from(first);
187        while let Some(&c) = chars.peek() {
188            if c.is_alphanumeric() || c == '_' {
189                text.push(chars.next().unwrap());
190            } else {
191                break;
192            }
193        }
194        return text;
195    }
196
197    // Number
198    if first.is_numeric() {
199        let mut text = String::from(first);
200        while let Some(&c) = chars.peek() {
201            if c.is_numeric() || c == '.' || c == 'e' || c == 'E' {
202                text.push(chars.next().unwrap());
203            } else {
204                break;
205            }
206        }
207        return text;
208    }
209
210    // Single character token
211    first.to_string()
212}
213
214fn classify_token(text: &str) -> TokenKind {
215    // Check for keywords
216    const KEYWORDS: &[&str] = &[
217        "let", "var", "const", "function", "return", "if", "else", "for", "while", "break",
218        "continue", "pattern", "query", "true", "false", "null", "import", "module", "extend",
219        "method", "stream", "find", "scan", "analyze", "on", "and", "or",
220    ];
221
222    if KEYWORDS.contains(&text) {
223        return TokenKind::Keyword(text.to_string());
224    }
225
226    if text
227        .chars()
228        .next()
229        .is_some_and(|c| c.is_alphabetic() || c == '_')
230    {
231        return TokenKind::Identifier;
232    }
233
234    if text.chars().next().is_some_and(|c| c.is_numeric()) {
235        return TokenKind::Number;
236    }
237
238    if text.starts_with('"') || text.starts_with('\'') || text.starts_with('`') {
239        return TokenKind::String;
240    }
241
242    TokenKind::Punctuation
243}
244
245fn parse_custom_error(message: &str, _location: &SourceLocation) -> ParseErrorKind {
246    // Try to recognize common patterns in custom error messages
247    let msg_lower = message.to_lowercase();
248
249    if msg_lower.contains("unterminated") && msg_lower.contains("string") {
250        return ParseErrorKind::UnterminatedString {
251            start_location: SourceLocation::new(0, 0), // Would need more context
252            delimiter: super::StringDelimiter::DoubleQuote,
253        };
254    }
255
256    if msg_lower.contains("unterminated") && msg_lower.contains("comment") {
257        return ParseErrorKind::UnterminatedComment {
258            start_location: SourceLocation::new(0, 0),
259        };
260    }
261
262    ParseErrorKind::Custom {
263        message: message.to_string(),
264    }
265}
266
267fn generate_suggestions(
268    kind: &ParseErrorKind,
269    source: &str,
270    location: &SourceLocation,
271) -> Vec<Suggestion> {
272    let mut suggestions = Vec::new();
273
274    match kind {
275        ParseErrorKind::UnexpectedToken { found, expected } => {
276            // Check for common typos using Levenshtein distance
277            if let Some(TokenKind::Identifier) = &found.kind {
278                // Check if the identifier might be a typo of a keyword
279                for keyword in &["function", "return", "pattern", "import"] {
280                    if levenshtein_distance(&found.text, keyword) <= 2 {
281                        suggestions.push(
282                            Suggestion::likely(format!("did you mean `{}`?", keyword)).with_edit(
283                                TextEdit::replace(
284                                    (location.line, location.column),
285                                    (location.line, location.column + found.text.len()),
286                                    keyword.to_string(),
287                                ),
288                            ),
289                        );
290                        break;
291                    }
292                }
293            }
294
295            // Suggest missing semicolon
296            if expected
297                .iter()
298                .any(|e| matches!(e, ExpectedToken::Literal(s) if s == ";"))
299            {
300                suggestions.push(
301                    Suggestion::likely("try adding a semicolon here").with_edit(TextEdit::insert(
302                        location.line,
303                        location.column,
304                        ";",
305                    )),
306                );
307            }
308
309            // Suggest missing closing delimiter
310            for delim in &[")", "]", "}"] {
311                if expected
312                    .iter()
313                    .any(|e| matches!(e, ExpectedToken::Literal(s) if s == *delim))
314                {
315                    suggestions.push(Suggestion::likely(format!(
316                        "you may be missing a `{}`",
317                        delim
318                    )));
319                    break;
320                }
321            }
322
323            // Suggest missing `=>` in match arms
324            if expected
325                .iter()
326                .any(|e| matches!(e, ExpectedToken::Rule(s) if s == "match_arm"))
327            {
328                suggestions.push(Suggestion::likely(
329                    "match arms require `=>` after the pattern, e.g. `pattern => expression`",
330                ));
331            }
332
333            // Suggest `pre` or `post` for comptime handler phase (BUG-15)
334            if expected.iter().any(
335                |e| matches!(e, ExpectedToken::Rule(s) if s == "comptime_annotation_handler_phase"),
336            ) {
337                suggestions.push(Suggestion::likely(
338                    "use `comptime pre(...)` or `comptime post(...)` to specify the handler phase",
339                ));
340            }
341
342            // Suggest valid join strategies
343            if expected
344                .iter()
345                .any(|e| matches!(e, ExpectedToken::Rule(s) if s == "join_kind"))
346            {
347                suggestions.push(Suggestion::likely(
348                    "expected a join strategy: `all`, `race`, `any`, or `settle`",
349                ));
350            }
351
352            if let Some(suggestion) =
353                struct_literal_named_field_suggestion(source, location, found, expected)
354            {
355                suggestions.push(suggestion);
356            }
357        }
358
359        ParseErrorKind::UnexpectedEof { expected } => {
360            if !expected.is_empty() {
361                let needs_brace = expected
362                    .iter()
363                    .any(|e| matches!(e, ExpectedToken::Literal(s) if s == "}"));
364                let needs_body = expected.iter().any(|e| {
365                    matches!(e, ExpectedToken::Rule(s) if s == "function_body" || s == "block_expr")
366                });
367
368                if needs_brace || needs_body {
369                    suggestions.push(Suggestion::likely(
370                        "you may have an unclosed block - check for missing `}`",
371                    ));
372                } else {
373                    suggestions.push(Suggestion::new(
374                        "the file ended unexpectedly - check for unclosed delimiters",
375                    ));
376                }
377            }
378
379            // If no expected tokens, check source for unclosed delimiters
380            if expected.is_empty() {
381                let open_braces = source.chars().filter(|c| *c == '{').count();
382                let close_braces = source.chars().filter(|c| *c == '}').count();
383                if open_braces > close_braces {
384                    suggestions.push(Suggestion::likely(
385                        "you may have an unclosed block - check for missing `}`",
386                    ));
387                }
388            }
389
390            // Suggest `pre` or `post` for comptime handler phase at EOF (BUG-15)
391            if expected.iter().any(
392                |e| matches!(e, ExpectedToken::Rule(s) if s == "comptime_annotation_handler_phase"),
393            ) {
394                suggestions.push(Suggestion::likely(
395                    "use `comptime pre(...)` or `comptime post(...)` to specify the handler phase",
396                ));
397            }
398        }
399
400        ParseErrorKind::UnterminatedString { delimiter, .. } => {
401            let close_char = match delimiter {
402                super::StringDelimiter::DoubleQuote => '"',
403                super::StringDelimiter::SingleQuote => '\'',
404                super::StringDelimiter::Backtick => '`',
405            };
406            suggestions.push(Suggestion::certain(format!(
407                "add closing `{}` to terminate the string",
408                close_char
409            )));
410        }
411
412        ParseErrorKind::UnbalancedDelimiter { opener, .. } => {
413            let closer = super::parse_error::matching_close(*opener);
414            suggestions.push(Suggestion::certain(format!(
415                "add `{}` to close the `{}`",
416                closer, opener
417            )));
418        }
419
420        ParseErrorKind::ReservedKeyword { keyword, .. } => {
421            suggestions.push(Suggestion::new(format!(
422                "try using a different name, such as `{}_value` or `my_{}`",
423                keyword, keyword
424            )));
425        }
426
427        ParseErrorKind::InvalidEscape {
428            sequence: _,
429            valid_escapes,
430        } => {
431            if !valid_escapes.is_empty() {
432                suggestions.push(Suggestion::certain(format!(
433                    "valid escape sequences are: {}",
434                    valid_escapes.join(", ")
435                )));
436            }
437        }
438
439        _ => {}
440    }
441
442    suggestions
443}
444
445fn struct_literal_named_field_suggestion(
446    source: &str,
447    location: &SourceLocation,
448    found: &TokenInfo,
449    _expected: &[ExpectedToken],
450) -> Option<Suggestion> {
451    if !matches!(found.kind, Some(TokenKind::String)) {
452        return None;
453    }
454
455    let offset = line_col_to_offset(source, location.line, location.column)?;
456    let prefix = &source[..offset.min(source.len())];
457    let trimmed_len = prefix.trim_end_matches(char::is_whitespace).len();
458    if trimmed_len == 0 {
459        return None;
460    }
461
462    let bytes = prefix.as_bytes();
463    let prev = bytes[trimmed_len - 1] as char;
464    if prev != '{' && prev != ',' {
465        return None;
466    }
467
468    if prev == '{' {
469        // Try to recover `TypeName` from `TypeName { "..." }` for a concrete hint.
470        let mut end = trimmed_len - 1;
471        while end > 0 && bytes[end - 1].is_ascii_whitespace() {
472            end -= 1;
473        }
474        let mut start = end;
475        while start > 0 {
476            let c = bytes[start - 1] as char;
477            if c.is_ascii_alphanumeric() || c == '_' {
478                start -= 1;
479            } else {
480                break;
481            }
482        }
483        if start < end {
484            let ty_name = &prefix[start..end];
485            if ty_name
486                .chars()
487                .next()
488                .map(|c| c.is_ascii_uppercase())
489                .unwrap_or(false)
490            {
491                return Some(Suggestion::likely(format!(
492                    "struct literals require named fields, e.g. `{} {{ name: {} }}`",
493                    ty_name, found.text
494                )));
495            }
496        }
497    }
498
499    Some(Suggestion::likely(
500        "struct literals require named fields: `TypeName { field: value }`",
501    ))
502}
503
504fn line_col_to_offset(source: &str, line: usize, column: usize) -> Option<usize> {
505    if line == 0 || column == 0 {
506        return None;
507    }
508
509    let mut byte_offset = 0usize;
510    let mut lines = source.split('\n');
511    let line_text = lines.nth(line - 1)?;
512    for prev in source.split('\n').take(line - 1) {
513        byte_offset = byte_offset.saturating_add(prev.len() + 1);
514    }
515
516    let col0 = column.saturating_sub(1);
517    let col_byte = if col0 == 0 {
518        0
519    } else {
520        line_text
521            .char_indices()
522            .nth(col0)
523            .map(|(i, _)| i)
524            .unwrap_or(line_text.len())
525    };
526
527    Some(byte_offset.saturating_add(col_byte))
528}
529
530fn determine_error_code(kind: &ParseErrorKind) -> ErrorCode {
531    match kind {
532        ParseErrorKind::UnexpectedToken { .. } => ErrorCode::E0001,
533        ParseErrorKind::UnexpectedEof { .. } => ErrorCode::E0001,
534        ParseErrorKind::UnterminatedString { .. } => ErrorCode::E0002,
535        ParseErrorKind::UnterminatedComment { .. } => ErrorCode::E0002,
536        ParseErrorKind::InvalidNumber { .. } => ErrorCode::E0003,
537        ParseErrorKind::MissingComponent {
538            component: super::MissingComponentKind::Semicolon,
539            ..
540        } => ErrorCode::E0004,
541        ParseErrorKind::UnbalancedDelimiter { .. } => ErrorCode::E0005,
542        _ => ErrorCode::E0001, // Default to unexpected token
543    }
544}
545
546/// Simple Levenshtein distance implementation
547fn levenshtein_distance(a: &str, b: &str) -> usize {
548    let a_chars: Vec<char> = a.chars().collect();
549    let b_chars: Vec<char> = b.chars().collect();
550    let a_len = a_chars.len();
551    let b_len = b_chars.len();
552
553    if a_len == 0 {
554        return b_len;
555    }
556    if b_len == 0 {
557        return a_len;
558    }
559
560    let mut prev_row: Vec<usize> = (0..=b_len).collect();
561    let mut curr_row = vec![0; b_len + 1];
562
563    for (i, a_char) in a_chars.iter().enumerate() {
564        curr_row[0] = i + 1;
565        for (j, b_char) in b_chars.iter().enumerate() {
566            let cost = if a_char == b_char { 0 } else { 1 };
567            curr_row[j + 1] = (prev_row[j + 1] + 1)
568                .min(curr_row[j] + 1)
569                .min(prev_row[j] + cost);
570        }
571        std::mem::swap(&mut prev_row, &mut curr_row);
572    }
573
574    prev_row[b_len]
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580    use crate::parser::{Rule, ShapeParser};
581    use pest::Parser;
582
583    #[test]
584    fn suggests_named_fields_for_positional_struct_literal_value() {
585        let source = r#"User {"John"}"#;
586        let pest_err =
587            ShapeParser::parse(Rule::struct_literal, source).expect_err("expected parse error");
588        let structured = convert_pest_error(&pest_err, source);
589        let has_hint = structured
590            .suggestions
591            .iter()
592            .any(|s| s.message.contains("struct literals require named fields"));
593        assert!(
594            has_hint,
595            "expected named-field struct literal hint, got: {:?}",
596            structured
597                .suggestions
598                .iter()
599                .map(|s| s.message.as_str())
600                .collect::<Vec<_>>()
601        );
602    }
603
604    #[test]
605    fn test_classify_keyword() {
606        assert!(matches!(classify_token("function"), TokenKind::Keyword(_)));
607        assert!(matches!(classify_token("return"), TokenKind::Keyword(_)));
608    }
609
610    #[test]
611    fn test_classify_identifier() {
612        assert!(matches!(classify_token("foo"), TokenKind::Identifier));
613        assert!(matches!(classify_token("myVar"), TokenKind::Identifier));
614        assert!(matches!(classify_token("_private"), TokenKind::Identifier));
615    }
616
617    #[test]
618    fn test_classify_number() {
619        assert!(matches!(classify_token("42"), TokenKind::Number));
620        assert!(matches!(classify_token("3.14"), TokenKind::Number));
621    }
622
623    #[test]
624    fn test_extract_token_text() {
625        assert_eq!(extract_token_text("foo + bar"), "foo");
626        assert_eq!(extract_token_text("123.45"), "123.45");
627        assert_eq!(extract_token_text(")"), ")");
628        assert_eq!(extract_token_text(""), "");
629    }
630
631    #[test]
632    fn test_levenshtein_distance() {
633        assert_eq!(levenshtein_distance("function", "function"), 0);
634        assert_eq!(levenshtein_distance("fucntion", "function"), 2);
635        assert_eq!(levenshtein_distance("funciton", "function"), 2);
636        assert_eq!(levenshtein_distance("", "abc"), 3);
637        assert_eq!(levenshtein_distance("abc", ""), 3);
638    }
639
640    // BUG-9: Generic parse error quality tests
641
642    #[test]
643    fn match_arm_missing_fat_arrow_produces_helpful_error() {
644        let source = "match x { 1 2 }";
645        let pest_err =
646            ShapeParser::parse(Rule::expression, source).expect_err("expected parse error");
647        let structured = convert_pest_error(&pest_err, source);
648        let msg = format!("{}", structured);
649        assert!(
650            !msg.contains("expected something else"),
651            "error should be specific, got: {}",
652            msg
653        );
654    }
655
656    #[test]
657    fn missing_function_body_produces_helpful_error() {
658        let source = "function foo()";
659        let pest_err =
660            ShapeParser::parse(Rule::function_def, source).expect_err("expected parse error");
661        let structured = convert_pest_error(&pest_err, source);
662        let msg = format!("{}", structured);
663        assert!(
664            !msg.contains("expected something else"),
665            "error should mention function body, got: {}",
666            msg
667        );
668    }
669
670    #[test]
671    fn missing_closing_brace_produces_helpful_suggestion() {
672        let source = "{ let x = 1;";
673        let pest_err =
674            ShapeParser::parse(Rule::block_expr, source).expect_err("expected parse error");
675        let structured = convert_pest_error(&pest_err, source);
676        let msg = format!("{}", structured);
677        let has_brace_hint = msg.contains("`}`")
678            || msg.contains("unclosed")
679            || structured
680                .suggestions
681                .iter()
682                .any(|s| s.message.contains("`}`") || s.message.contains("unclosed"));
683        assert!(
684            has_brace_hint,
685            "expected closing brace hint, got message: '{}', suggestions: {:?}",
686            msg,
687            structured
688                .suggestions
689                .iter()
690                .map(|s| s.message.as_str())
691                .collect::<Vec<_>>()
692        );
693    }
694
695    #[test]
696    fn rule_to_expected_token_covers_match_arm() {
697        let token = rule_to_expected_token(&Rule::match_arm);
698        assert!(token.is_some(), "match_arm should produce an ExpectedToken");
699    }
700
701    #[test]
702    fn rule_to_expected_token_covers_match_expr() {
703        let token = rule_to_expected_token(&Rule::match_expr);
704        assert!(
705            token.is_some(),
706            "match_expr should produce an ExpectedToken"
707        );
708    }
709
710    #[test]
711    fn rule_to_expected_token_covers_block_expr() {
712        let token = rule_to_expected_token(&Rule::block_expr);
713        assert!(
714            token.is_some(),
715            "block_expr should produce an ExpectedToken"
716        );
717    }
718
719    #[test]
720    fn rule_to_expected_token_covers_function_body() {
721        let token = rule_to_expected_token(&Rule::function_body);
722        assert!(
723            token.is_some(),
724            "function_body should produce an ExpectedToken"
725        );
726    }
727
728    #[test]
729    fn rule_to_expected_token_covers_function_params() {
730        let token = rule_to_expected_token(&Rule::function_params);
731        assert!(
732            token.is_some(),
733            "function_params should produce an ExpectedToken"
734        );
735    }
736
737    #[test]
738    fn rule_to_expected_token_covers_pattern() {
739        let token = rule_to_expected_token(&Rule::pattern);
740        assert!(token.is_some(), "pattern should produce an ExpectedToken");
741    }
742
743    // BUG-15: Comptime error quality tests
744
745    #[test]
746    fn rule_to_expected_token_covers_comptime_handler_phase() {
747        let token = rule_to_expected_token(&Rule::comptime_annotation_handler_phase);
748        assert!(
749            token.is_some(),
750            "comptime_annotation_handler_phase should produce an ExpectedToken"
751        );
752    }
753
754    #[test]
755    fn comptime_invalid_phase_produces_suggestion() {
756        let source = "comptime target";
757        let pest_err = ShapeParser::parse(Rule::annotation_handler_kind, source)
758            .expect_err("expected parse error");
759        let structured = convert_pest_error(&pest_err, source);
760        let has_comptime_hint = structured
761            .suggestions
762            .iter()
763            .any(|s| s.message.contains("pre") && s.message.contains("post"));
764        assert!(
765            has_comptime_hint,
766            "expected comptime pre/post suggestion, got suggestions: {:?}",
767            structured
768                .suggestions
769                .iter()
770                .map(|s| s.message.as_str())
771                .collect::<Vec<_>>()
772        );
773    }
774
775    #[test]
776    fn test_extract_found_token_with_multibyte_utf8() {
777        // em-dash is 3 bytes in UTF-8 — this used to panic with
778        // "byte index N is not a char boundary"
779        let source = "// comment — rest\nlet x = 1";
780        // Exercise extract_found_token with a location pointing past the em-dash
781        let loc = SourceLocation::new(1, 14); // char position past "— "
782        let token = extract_found_token(source, &loc);
783        // Should not panic, and should extract "rest" or something reasonable
784        assert!(!token.text.is_empty() || token.kind == Some(TokenKind::Unknown));
785    }
786
787    #[test]
788    fn test_extract_found_token_multibyte_at_error_position() {
789        // Trigger a parse error where the error position is on a multi-byte char
790        let source = "let — = 1";
791        let pest_err =
792            ShapeParser::parse(Rule::program, source).expect_err("expected parse error");
793        // Should not panic
794        let structured = convert_pest_error(&pest_err, source);
795        // kind should be set (not a default/empty error)
796        assert!(!matches!(structured.kind, ParseErrorKind::MissingComponent { .. }));
797    }
798}