ghostscope_compiler/script/
parser.rs

1use pest::iterators::{Pair, Pairs};
2use pest::Parser;
3use pest::RuleType;
4use pest_derive::Parser;
5
6use crate::script::ast::{
7    infer_type, BinaryOp, Expr, PrintStatement, Program, Statement, TracePattern,
8};
9use crate::script::format_validator::FormatValidator;
10use tracing::{debug, info};
11
12#[derive(Parser)]
13#[grammar = "script/grammar.pest"]
14pub struct GhostScopeParser;
15
16#[derive(Debug, thiserror::Error)]
17pub enum ParseError {
18    #[error("Pest parser error: {0}")]
19    Pest(#[from] Box<pest::error::Error<Rule>>),
20
21    #[error("Unexpected token: {0:?}")]
22    UnexpectedToken(Rule),
23
24    #[error("Invalid expression")]
25    InvalidExpression,
26
27    #[error("Syntax error: {0}")]
28    SyntaxError(String),
29
30    #[error("Type error: {0}")]
31    TypeError(String),
32
33    #[error("Unsupported feature: {0}")]
34    UnsupportedFeature(String),
35}
36
37impl From<pest::error::Error<Rule>> for ParseError {
38    fn from(err: pest::error::Error<Rule>) -> Self {
39        ParseError::Pest(Box::new(err))
40    }
41}
42
43pub type Result<T> = std::result::Result<T, ParseError>;
44
45// Custom chunks function with RuleType constraint
46fn chunks_of_two<'a, T: RuleType>(pairs: Pairs<'a, T>) -> Vec<Vec<Pair<'a, T>>> {
47    let pairs_vec: Vec<_> = pairs.collect();
48    let mut result = Vec::new();
49
50    let mut i = 0;
51    // Only produce full (op, rhs) pairs; ignore any trailing leftover defensively
52    while i + 1 < pairs_vec.len() {
53        result.push(vec![pairs_vec[i].clone(), pairs_vec[i + 1].clone()]);
54        i += 2;
55    }
56
57    result
58}
59
60pub fn parse(input: &str) -> Result<Program> {
61    debug!("Starting to parse input: {}", input.trim());
62
63    let pairs = match GhostScopeParser::parse(Rule::program, input) {
64        Ok(p) => p,
65        Err(e) => {
66            // Heuristic: detect unclosed string in print lines to provide a clearer hint
67            if let Some(msg) = detect_unclosed_print_string(input) {
68                return Err(ParseError::SyntaxError(msg));
69            }
70            // Heuristic: detect likely misspelled or unknown keywords and suggest fixes
71            if let Some(msg) = detect_unknown_keyword(input) {
72                return Err(ParseError::SyntaxError(msg));
73            }
74            return Err(ParseError::Pest(Box::new(e)));
75        }
76    };
77    let mut program = Program::new();
78
79    for pair in pairs {
80        debug!(
81            "Parsing top-level rule: {:?} = '{}'",
82            pair.as_rule(),
83            pair.as_str().trim()
84        );
85        match pair.as_rule() {
86            Rule::statement => {
87                let statement = parse_statement(pair)?;
88                program.add_statement(statement);
89            }
90            Rule::EOI => {}
91            _ => return Err(ParseError::UnexpectedToken(pair.as_rule())),
92        }
93    }
94
95    debug!("Parsing completed successfully");
96    Ok(program)
97}
98
99// Best-effort heuristic: if a line contains a print statement with an opening quote
100// but no closing quote before arguments, give a clearer error.
101fn detect_unclosed_print_string(input: &str) -> Option<String> {
102    for (i, raw_line) in input.lines().enumerate() {
103        let line = raw_line.trim_start();
104        if !line.contains("print ") && !line.starts_with("print") {
105            continue;
106        }
107        // Toggle on '"' to detect unclosed string; ignore escaped quotes for simplicity
108        let mut open = false;
109        for ch in line.chars() {
110            if ch == '"' {
111                open = !open;
112            }
113        }
114        if open {
115            // Common case: missing closing quote before comma and arguments
116            if line.contains(',') {
117                return Some(format!(
118                    "Unclosed string literal in print at line {}. Did you forget a closing \"\" before ',' and arguments?",
119                    i + 1
120                ));
121            } else {
122                return Some(format!(
123                    "Unclosed string literal in print at line {}.",
124                    i + 1
125                ));
126            }
127        }
128    }
129    None
130}
131
132// Try to detect lines that start with an unknown/misspelled keyword and suggest known ones.
133fn detect_unknown_keyword(input: &str) -> Option<String> {
134    // Suggest only currently supported top-level keywords.
135    const SUGGEST: &[&str] = &["trace", "print", "if", "else", "let"];
136    // Valid statement starters that should not be flagged as unknown
137    const SUPPORTED_HEADS: &[&str] = &["trace", "print", "if", "else", "let", "backtrace", "bt"];
138    // Builtin call names allowed at expression head
139    const BUILTIN_CALLS: &[&str] = &["memcmp", "strncmp", "starts_with", "hex"];
140
141    // Helper: simple Levenshtein distance (small strings, few keywords)
142    fn levenshtein(a: &str, b: &str) -> usize {
143        let (n, m) = (a.len(), b.len());
144        let mut dp = vec![0usize; (n + 1) * (m + 1)];
145        let idx = |i: usize, j: usize| i * (m + 1) + j;
146        for i in 0..=n {
147            dp[idx(i, 0)] = i;
148        }
149        for j in 0..=m {
150            dp[idx(0, j)] = j;
151        }
152        let ac: Vec<char> = a.chars().collect();
153        let bc: Vec<char> = b.chars().collect();
154        for i in 1..=n {
155            for j in 1..=m {
156                let cost = if ac[i - 1] == bc[j - 1] { 0 } else { 1 };
157                let del = dp[idx(i - 1, j)] + 1;
158                let ins = dp[idx(i, j - 1)] + 1;
159                let sub = dp[idx(i - 1, j - 1)] + cost;
160                dp[idx(i, j)] = del.min(ins).min(sub);
161            }
162        }
163        dp[idx(n, m)]
164    }
165
166    // Helper: check a slice for a command-like unknown keyword
167    fn check_slice(slice: &str, line_no_1based: usize) -> Option<String> {
168        let mut s = slice.trim_start();
169        if s.is_empty() || s.starts_with("//") {
170            return None;
171        }
172
173        // If this slice begins with an if/else-if header, jump inside the condition
174        if let Some(rest) = s.strip_prefix("if") {
175            if rest.starts_with(char::is_whitespace) {
176                s = rest.trim_start();
177            }
178        } else if let Some(rest) = s.strip_prefix("else") {
179            let rest = rest.trim_start();
180            if let Some(rest2) = rest.strip_prefix("if") {
181                if rest2.starts_with(char::is_whitespace) {
182                    s = rest2.trim_start();
183                }
184            } else {
185                // 'else { ... }' — nothing to inspect here
186            }
187        }
188        // Keywords must start with a letter or underscore; skip numeric heads
189        let mut iter = s.chars();
190        let first = iter.next()?;
191        if !(first.is_ascii_alphabetic() || first == '_') {
192            return None;
193        }
194        let mut token = String::new();
195        token.push(first);
196        for ch in iter {
197            if ch.is_ascii_alphanumeric() || ch == '_' {
198                token.push(ch);
199            } else {
200                break;
201            }
202        }
203        if token.is_empty() {
204            return None;
205        }
206        if SUPPORTED_HEADS.iter().any(|k| *k == token) {
207            return None;
208        }
209        let rest_untrimmed = &s[token.len()..];
210        let rest = rest_untrimmed.trim_start();
211        if rest.starts_with('=') || rest.starts_with('[') || rest.starts_with('.') {
212            // likely an expression starting with identifier
213            return None;
214        }
215        // Allow builtin calls as expression statements
216        if BUILTIN_CALLS.iter().any(|k| *k == token) && rest.starts_with('(') {
217            return None;
218        }
219        if rest.starts_with('(')
220            || rest.starts_with('{')
221            || rest.starts_with('"')
222            || rest_untrimmed.starts_with(char::is_whitespace)
223        {
224            // If it looks like a call (token + '('), include builtin calls in suggestion candidates
225            let candidates: Vec<&str> = if rest.starts_with('(') {
226                let mut v = Vec::new();
227                v.extend_from_slice(SUGGEST);
228                v.extend_from_slice(BUILTIN_CALLS);
229                v
230            } else {
231                SUGGEST.to_vec()
232            };
233            let mut suggestions: Vec<(&str, usize)> = candidates
234                .iter()
235                .map(|&k| (k, levenshtein(&token, k)))
236                .collect();
237            suggestions.sort_by_key(|&(_, d)| d);
238            if let Some((cand, dist)) = suggestions.first().copied() {
239                if dist <= 2 {
240                    return Some(format!(
241                        "Unknown keyword '{token}' at line {line_no_1based}. Did you mean '{cand}'?"
242                    ));
243                }
244            }
245            return Some(format!(
246                "Unknown keyword '{token}' at line {}. Expected one of: {}",
247                line_no_1based,
248                SUGGEST.join(", ")
249            ));
250        }
251        None
252    }
253
254    for (i, raw_line) in input.lines().enumerate() {
255        let line = raw_line;
256        // Scan potential statement starts: at line start, and right after '{', ';', '}', '(', ',' (outside strings)
257        let mut quote_open = false;
258        let mut positions: Vec<usize> = vec![0]; // include start-of-line
259        for (idx, ch) in line.char_indices() {
260            if ch == '"' {
261                quote_open = !quote_open;
262            }
263            if !quote_open && (ch == '{' || ch == ';' || ch == '}' || ch == '(' || ch == ',') {
264                let next = idx + ch.len_utf8();
265                if next < line.len() {
266                    positions.push(next);
267                }
268            }
269        }
270        for &pos in &positions {
271            if let Some(msg) = check_slice(&line[pos..], i + 1) {
272                return Some(msg);
273            }
274        }
275    }
276    None
277}
278
279fn parse_statement(pair: Pair<Rule>) -> Result<Statement> {
280    debug!(
281        "parse_statement: {:?} = '{}'",
282        pair.as_rule(),
283        pair.as_str().trim()
284    );
285    let inner = pair
286        .into_inner()
287        .next()
288        .ok_or(ParseError::InvalidExpression)?;
289    debug!(
290        "parse_statement inner: {:?} = '{}'",
291        inner.as_rule(),
292        inner.as_str().trim()
293    );
294
295    match inner.as_rule() {
296        Rule::trace_stmt => {
297            let mut inner_pairs = inner.into_inner();
298            let pattern_pair = inner_pairs.next().ok_or(ParseError::InvalidExpression)?;
299            let pattern = parse_trace_pattern(pattern_pair)?;
300
301            let mut body = Vec::new();
302            for stmt_pair in inner_pairs {
303                // Disallow nested trace statements (trace is top-level only)
304                if stmt_pair.as_rule() == Rule::statement {
305                    let mut peek = stmt_pair.clone().into_inner();
306                    if let Some(first) = peek.next() {
307                        if first.as_rule() == Rule::trace_stmt {
308                            return Err(ParseError::SyntaxError(
309                                "'trace' cannot be nested; it is only allowed at the top level"
310                                    .to_string(),
311                            ));
312                        }
313                    }
314                }
315                let stmt = parse_statement(stmt_pair)?;
316                body.push(stmt);
317            }
318
319            Ok(Statement::TracePoint { pattern, body })
320        }
321        Rule::print_stmt => {
322            let print_content = inner
323                .into_inner()
324                .next()
325                .ok_or(ParseError::InvalidExpression)?;
326            let print_stmt = parse_print_content(print_content)?;
327            Ok(Statement::Print(print_stmt))
328        }
329        Rule::backtrace_stmt => Ok(Statement::Backtrace),
330        Rule::assign_stmt => {
331            // Friendly error for immutable variables (no assignment supported)
332            let mut it = inner.into_inner();
333            let name = it
334                .next()
335                .ok_or(ParseError::InvalidExpression)?
336                .as_str()
337                .to_string();
338            // consume rhs expr
339            let _ = it.next();
340            Err(ParseError::TypeError(format!(
341                "Assignment is not supported: variables are immutable. Use 'let {name} = ...' to bind once."
342            )))
343        }
344        Rule::expr_stmt => {
345            let expr = inner
346                .into_inner()
347                .next()
348                .ok_or(ParseError::InvalidExpression)?;
349            let parsed_expr = parse_expr(expr)?;
350
351            // Check expression type to ensure consistent operation types
352            if let Err(err) = infer_type(&parsed_expr) {
353                return Err(ParseError::TypeError(err));
354            }
355
356            Ok(Statement::Expr(parsed_expr))
357        }
358        Rule::var_decl_stmt => {
359            let mut inner_pairs = inner.into_inner();
360            let name = inner_pairs
361                .next()
362                .ok_or(ParseError::InvalidExpression)?
363                .as_str()
364                .to_string();
365            let expr = inner_pairs.next().ok_or(ParseError::InvalidExpression)?;
366            let parsed_expr = parse_expr(expr)?;
367
368            // Check expression type to ensure consistent operation types
369            if let Err(err) = infer_type(&parsed_expr) {
370                return Err(ParseError::TypeError(err));
371            }
372
373            if is_alias_expr(&parsed_expr) {
374                Ok(Statement::AliasDeclaration {
375                    name,
376                    target: parsed_expr,
377                })
378            } else {
379                Ok(Statement::VarDeclaration {
380                    name,
381                    value: parsed_expr,
382                })
383            }
384        }
385        Rule::if_stmt => {
386            debug!("Parsing if_stmt");
387            let mut inner_pairs = inner.into_inner();
388            let condition_pair = inner_pairs.next().ok_or(ParseError::InvalidExpression)?;
389            debug!(
390                "if_stmt condition_pair: {:?} = '{}'",
391                condition_pair.as_rule(),
392                condition_pair.as_str().trim()
393            );
394            let condition = parse_condition(condition_pair)?;
395
396            // Parse then body statements
397            let mut then_body = Vec::new();
398            let mut else_body = None;
399
400            for pair in inner_pairs {
401                match pair.as_rule() {
402                    Rule::statement => {
403                        then_body.push(parse_statement(pair)?);
404                    }
405                    Rule::else_clause => {
406                        else_body = Some(Box::new(parse_else_clause(pair)?));
407                        break;
408                    }
409                    _ => return Err(ParseError::UnexpectedToken(pair.as_rule())),
410                }
411            }
412
413            Ok(Statement::If {
414                condition,
415                then_body,
416                else_body,
417            })
418        }
419        _ => Err(ParseError::UnexpectedToken(inner.as_rule())),
420    }
421}
422
423fn parse_expr(pair: Pair<Rule>) -> Result<Expr> {
424    match pair.as_rule() {
425        Rule::expr => {
426            let inner = pair
427                .into_inner()
428                .next()
429                .ok_or(ParseError::InvalidExpression)?;
430            parse_logical_or(inner)
431        }
432        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
433    }
434}
435
436/// Determine if an expression should be treated as a DWARF alias binding.
437/// This is a purely syntactic check (parser phase) and does not consult DWARF.
438fn is_alias_expr(e: &Expr) -> bool {
439    use crate::script::ast::BinaryOp as BO;
440    use crate::script::ast::Expr as E;
441    match e {
442        E::AddressOf(_) => true,
443        // Constant offset on top of an alias-eligible expression
444        E::BinaryOp {
445            left,
446            op: BO::Add,
447            right,
448        } => {
449            let is_nonneg_lit = |x: &E| matches!(x, E::Int(v) if *v >= 0);
450            (is_alias_expr(left) && is_nonneg_lit(right))
451                || (is_alias_expr(right) && is_nonneg_lit(left))
452        }
453        _ => false,
454    }
455}
456
457fn parse_logical_or(pair: Pair<Rule>) -> Result<Expr> {
458    match pair.as_rule() {
459        Rule::logical_or => {
460            let mut pairs = pair.into_inner();
461            let first = pairs.next().ok_or(ParseError::InvalidExpression)?;
462            let mut left = parse_logical_and(first)?;
463
464            for chunk in chunks_of_two(pairs) {
465                if chunk.len() != 2 {
466                    return Err(ParseError::InvalidExpression);
467                }
468                if chunk[0].as_rule() != Rule::or_op {
469                    return Err(ParseError::UnexpectedToken(chunk[0].as_rule()));
470                }
471                let right = parse_logical_and(chunk[1].clone())?;
472                let expr = Expr::BinaryOp {
473                    left: Box::new(left),
474                    op: BinaryOp::LogicalOr,
475                    right: Box::new(right),
476                };
477                if let Err(err) = infer_type(&expr) {
478                    return Err(ParseError::TypeError(err));
479                }
480                left = expr;
481            }
482            Ok(left)
483        }
484        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
485    }
486}
487
488fn parse_logical_and(pair: Pair<Rule>) -> Result<Expr> {
489    match pair.as_rule() {
490        Rule::logical_and => {
491            let mut pairs = pair.into_inner();
492            let first = pairs.next().ok_or(ParseError::InvalidExpression)?;
493            let mut left = parse_equality(first)?;
494
495            for chunk in chunks_of_two(pairs) {
496                if chunk.len() != 2 {
497                    return Err(ParseError::InvalidExpression);
498                }
499                if chunk[0].as_rule() != Rule::and_op {
500                    return Err(ParseError::UnexpectedToken(chunk[0].as_rule()));
501                }
502                let right = parse_equality(chunk[1].clone())?;
503                let expr = Expr::BinaryOp {
504                    left: Box::new(left),
505                    op: BinaryOp::LogicalAnd,
506                    right: Box::new(right),
507                };
508                if let Err(err) = infer_type(&expr) {
509                    return Err(ParseError::TypeError(err));
510                }
511                left = expr;
512            }
513            Ok(left)
514        }
515        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
516    }
517}
518
519fn parse_equality(pair: Pair<Rule>) -> Result<Expr> {
520    match pair.as_rule() {
521        Rule::equality => {
522            let mut pairs = pair.into_inner();
523            let first = pairs.next().ok_or(ParseError::InvalidExpression)?;
524            let mut left = parse_relational(first)?;
525
526            for chunk in chunks_of_two(pairs) {
527                if chunk.len() != 2 {
528                    return Err(ParseError::InvalidExpression);
529                }
530                if chunk[0].as_rule() != Rule::eq_op {
531                    return Err(ParseError::UnexpectedToken(chunk[0].as_rule()));
532                }
533                let op = match chunk[0].as_str() {
534                    "==" => BinaryOp::Equal,
535                    "!=" => BinaryOp::NotEqual,
536                    _ => return Err(ParseError::UnexpectedToken(chunk[0].as_rule())),
537                };
538                let right = parse_relational(chunk[1].clone())?;
539                let expr = Expr::BinaryOp {
540                    left: Box::new(left),
541                    op,
542                    right: Box::new(right),
543                };
544                // Type check literals only
545                if let Err(err) = infer_type(&expr) {
546                    return Err(ParseError::TypeError(err));
547                }
548                left = expr;
549            }
550            Ok(left)
551        }
552        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
553    }
554}
555
556fn parse_relational(pair: Pair<Rule>) -> Result<Expr> {
557    match pair.as_rule() {
558        Rule::relational => {
559            let mut pairs = pair.into_inner();
560            let first = pairs.next().ok_or(ParseError::InvalidExpression)?;
561            let mut left = parse_additive(first)?;
562
563            for chunk in chunks_of_two(pairs) {
564                if chunk.len() != 2 {
565                    return Err(ParseError::InvalidExpression);
566                }
567                if chunk[0].as_rule() != Rule::rel_op {
568                    return Err(ParseError::UnexpectedToken(chunk[0].as_rule()));
569                }
570                let op = match chunk[0].as_str() {
571                    "<" => BinaryOp::LessThan,
572                    "<=" => BinaryOp::LessEqual,
573                    ">" => BinaryOp::GreaterThan,
574                    ">=" => BinaryOp::GreaterEqual,
575                    _ => return Err(ParseError::UnexpectedToken(chunk[0].as_rule())),
576                };
577                let right = parse_additive(chunk[1].clone())?;
578                let expr = Expr::BinaryOp {
579                    left: Box::new(left),
580                    op,
581                    right: Box::new(right),
582                };
583                if let Err(err) = infer_type(&expr) {
584                    return Err(ParseError::TypeError(err));
585                }
586                left = expr;
587            }
588            Ok(left)
589        }
590        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
591    }
592}
593
594fn parse_additive(pair: Pair<Rule>) -> Result<Expr> {
595    match pair.as_rule() {
596        Rule::additive => {
597            let mut pairs = pair.into_inner();
598            let first = pairs.next().ok_or(ParseError::InvalidExpression)?;
599            let mut left = parse_term(first)?;
600
601            for chunk in chunks_of_two(pairs) {
602                if chunk.len() != 2 {
603                    return Err(ParseError::InvalidExpression);
604                }
605                let op = match chunk[0].as_str() {
606                    "+" => BinaryOp::Add,
607                    "-" => BinaryOp::Subtract,
608                    _ => return Err(ParseError::UnexpectedToken(chunk[0].as_rule())),
609                };
610                let right = parse_term(chunk[1].clone())?;
611                let expr = Expr::BinaryOp {
612                    left: Box::new(left),
613                    op,
614                    right: Box::new(right),
615                };
616                if let Err(err) = infer_type(&expr) {
617                    return Err(ParseError::TypeError(err));
618                }
619                left = expr;
620            }
621            Ok(left)
622        }
623        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
624    }
625}
626
627fn parse_condition(pair: Pair<Rule>) -> Result<Expr> {
628    debug!(
629        "parse_condition: {:?} = '{}'",
630        pair.as_rule(),
631        pair.as_str().trim()
632    );
633    match pair.as_rule() {
634        Rule::condition => {
635            // Condition now accepts a full expression (equality/relational/additive/etc.)
636            let inner_expr_pair = pair
637                .into_inner()
638                .next()
639                .ok_or(ParseError::InvalidExpression)?;
640            let expr = parse_expr(inner_expr_pair)?;
641            // Basic type check of the resulting expression
642            if let Err(err) = infer_type(&expr) {
643                return Err(ParseError::TypeError(err));
644            }
645            Ok(expr)
646        }
647        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
648    }
649}
650
651fn parse_else_clause(pair: Pair<Rule>) -> Result<Statement> {
652    let inner = pair
653        .into_inner()
654        .next()
655        .ok_or(ParseError::InvalidExpression)?;
656    match inner.as_rule() {
657        Rule::if_stmt => {
658            // Directly parse if statement for else if
659            debug!("Parsing else if statement");
660            let mut inner_pairs = inner.into_inner();
661            let condition_pair = inner_pairs.next().ok_or(ParseError::InvalidExpression)?;
662            debug!(
663                "else if condition_pair: {:?} = '{}'",
664                condition_pair.as_rule(),
665                condition_pair.as_str().trim()
666            );
667            let condition = parse_condition(condition_pair)?;
668
669            // Parse then body statements
670            let mut then_body = Vec::new();
671            let mut else_body = None;
672
673            for pair in inner_pairs {
674                match pair.as_rule() {
675                    Rule::statement => {
676                        then_body.push(parse_statement(pair)?);
677                    }
678                    Rule::else_clause => {
679                        else_body = Some(Box::new(parse_else_clause(pair)?));
680                        break;
681                    }
682                    _ => return Err(ParseError::UnexpectedToken(pair.as_rule())),
683                }
684            }
685
686            Ok(Statement::If {
687                condition,
688                then_body,
689                else_body,
690            })
691        }
692        _ => {
693            // Parse else block statements
694            let mut else_body = Vec::new();
695            for node in inner.into_inner() {
696                match node.as_rule() {
697                    Rule::statement => {
698                        else_body.push(parse_statement(node)?);
699                    }
700                    // Some grammars flatten block children to concrete statements (e.g., print_stmt)
701                    Rule::print_stmt => {
702                        let content = node
703                            .into_inner()
704                            .next()
705                            .ok_or(ParseError::InvalidExpression)?;
706                        let pr = parse_print_content(content)?;
707                        else_body.push(Statement::Print(pr));
708                    }
709                    _ => return Err(ParseError::UnexpectedToken(node.as_rule())),
710                }
711            }
712            Ok(Statement::Block(else_body))
713        }
714    }
715}
716
717fn parse_term(pair: Pair<Rule>) -> Result<Expr> {
718    match pair.as_rule() {
719        Rule::term => {
720            let mut pairs = pair.into_inner();
721            let first = pairs.next().ok_or(ParseError::InvalidExpression)?;
722            let mut left = parse_unary(first)?;
723
724            for chunk in chunks_of_two(pairs) {
725                if chunk.len() != 2 {
726                    return Err(ParseError::InvalidExpression);
727                }
728
729                let op = match chunk[0].as_str() {
730                    "*" => BinaryOp::Multiply,
731                    "/" => BinaryOp::Divide,
732                    _ => return Err(ParseError::UnexpectedToken(chunk[0].as_rule())),
733                };
734
735                let right = parse_unary(chunk[1].clone())?;
736
737                // Check type consistency for binary operations
738                let expr = Expr::BinaryOp {
739                    left: Box::new(left),
740                    op,
741                    right: Box::new(right),
742                };
743
744                // Only check type consistency for literals here
745                if let Err(err) = infer_type(&expr) {
746                    return Err(ParseError::TypeError(err));
747                }
748
749                left = expr;
750            }
751
752            Ok(left)
753        }
754        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
755    }
756}
757
758fn parse_unary(pair: Pair<Rule>) -> Result<Expr> {
759    match pair.as_rule() {
760        Rule::unary => {
761            let mut inner = pair.into_inner();
762            let first = inner.next().ok_or(ParseError::InvalidExpression)?;
763            match first.as_rule() {
764                Rule::factor => parse_factor(first),
765                // '-' ~ unary
766                Rule::neg_unary => {
767                    let u = first
768                        .into_inner()
769                        .next()
770                        .ok_or(ParseError::InvalidExpression)?;
771                    let right = parse_unary(u)?;
772                    let expr = Expr::BinaryOp {
773                        left: Box::new(Expr::Int(0)),
774                        op: BinaryOp::Subtract,
775                        right: Box::new(right),
776                    };
777                    if let Err(err) = infer_type(&expr) {
778                        return Err(ParseError::TypeError(err));
779                    }
780                    Ok(expr)
781                }
782                // '!' ~ unary
783                Rule::not_unary => {
784                    let u = first
785                        .into_inner()
786                        .next()
787                        .ok_or(ParseError::InvalidExpression)?;
788                    let right = parse_unary(u)?;
789                    Ok(Expr::UnaryNot(Box::new(right)))
790                }
791                _ => Err(ParseError::UnexpectedToken(first.as_rule())),
792            }
793        }
794        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
795    }
796}
797
798fn parse_factor(pair: Pair<Rule>) -> Result<Expr> {
799    match pair.as_rule() {
800        Rule::factor => {
801            let inner = pair
802                .into_inner()
803                .next()
804                .ok_or(ParseError::InvalidExpression)?;
805            match inner.as_rule() {
806                Rule::memcmp_call => parse_builtin_call(inner),
807                Rule::strncmp_call => parse_builtin_call(inner),
808                Rule::starts_with_call => parse_builtin_call(inner),
809                Rule::hex_call => parse_builtin_call(inner),
810                Rule::chain_access => parse_chain_access(inner),
811                Rule::pointer_deref => parse_pointer_deref(inner),
812                Rule::address_of => parse_address_of(inner),
813                Rule::int => match inner.as_str().parse::<i64>() {
814                    Ok(value) => Ok(Expr::Int(value)),
815                    Err(_) => Err(ParseError::TypeError(
816                        "invalid decimal integer literal".to_string(),
817                    )),
818                },
819                Rule::hex_int => {
820                    // strip 0x and parse as hex
821                    let s = inner.as_str();
822                    match i64::from_str_radix(&s[2..], 16) {
823                        Ok(v) => Ok(Expr::Int(v)),
824                        Err(_) => Err(ParseError::TypeError(
825                            "invalid hex integer literal".to_string(),
826                        )),
827                    }
828                }
829                Rule::oct_int => {
830                    let s = inner.as_str();
831                    match i64::from_str_radix(&s[2..], 8) {
832                        Ok(v) => Ok(Expr::Int(v)),
833                        Err(_) => Err(ParseError::TypeError(
834                            "invalid octal integer literal".to_string(),
835                        )),
836                    }
837                }
838                Rule::bin_int => {
839                    let s = inner.as_str();
840                    match i64::from_str_radix(&s[2..], 2) {
841                        Ok(v) => Ok(Expr::Int(v)),
842                        Err(_) => Err(ParseError::TypeError(
843                            "invalid binary integer literal".to_string(),
844                        )),
845                    }
846                }
847                // Floats are not supported by scripts/runtime; reject early with friendly error
848                Rule::float => Err(ParseError::TypeError(
849                    "float literals are not supported".to_string(),
850                )),
851                Rule::string => {
852                    // Remove quotes at the beginning and end
853                    let raw_value = inner.as_str();
854                    let value = &raw_value[1..raw_value.len() - 1];
855                    Ok(Expr::String(value.to_string()))
856                }
857                Rule::bool => {
858                    let val = inner.as_str() == "true";
859                    Ok(Expr::Bool(val))
860                }
861                Rule::identifier => {
862                    let name = inner.as_str().to_string();
863                    Ok(Expr::Variable(name))
864                }
865                Rule::array_access => parse_array_access(inner),
866                Rule::member_access => parse_member_access(inner),
867                Rule::special_var => {
868                    let var_name = inner.as_str().to_string();
869                    Ok(Expr::SpecialVar(var_name))
870                }
871                Rule::expr => parse_expr(inner),
872                _ => Err(ParseError::UnexpectedToken(inner.as_rule())),
873            }
874        }
875        _ => Err(ParseError::UnexpectedToken(pair.as_rule())),
876    }
877}
878
879fn parse_builtin_call(pair: Pair<Rule>) -> Result<Expr> {
880    // pair is memcmp_call / strncmp_call / starts_with_call / hex_call
881    let rule = pair.as_rule();
882    let mut it = pair.into_inner();
883    // First token inside is the function name as identifier within the rule text; easier approach: use rule to select
884    match rule {
885        Rule::memcmp_call => {
886            // grammar: memcmp("(" expr "," expr ["," expr] ")")
887            let mut nodes: Vec<_> = it.collect();
888            if nodes.len() < 2 || nodes.len() > 3 {
889                return Err(ParseError::InvalidExpression);
890            }
891            let a_expr = parse_expr(nodes.remove(0))?;
892            let b_expr = parse_expr(nodes.remove(0))?;
893
894            // Disallow obviously invalid types early
895            if matches!(a_expr, Expr::Bool(_)) || matches!(b_expr, Expr::Bool(_)) {
896                return Err(ParseError::TypeError(
897                    "memcmp pointer arguments cannot be boolean; use an address or hex(...)"
898                        .to_string(),
899                ));
900            }
901            if matches!(a_expr, Expr::String(_)) || matches!(b_expr, Expr::String(_)) {
902                return Err(ParseError::TypeError(
903                    "memcmp does not accept string literals; use strncmp for strings".to_string(),
904                ));
905            }
906
907            // Helper to get hex length (bytes)
908            let hex_len = |e: &Expr| -> Option<usize> {
909                if let Expr::BuiltinCall { name, args } = e {
910                    if name == "hex" {
911                        if let Some(Expr::String(s)) = args.first() {
912                            return Some(s.len() / 2);
913                        }
914                    }
915                }
916                None
917            };
918
919            let n_expr = if let Some(n_node) = nodes.first() {
920                // With explicit len: reuse previous literal checks
921                let n_expr = parse_expr(n_node.clone())?;
922                if matches!(n_expr, Expr::Bool(_)) {
923                    return Err(ParseError::TypeError(
924                        "memcmp length must be an integer or expression, not boolean".to_string(),
925                    ));
926                }
927                let literal_len_opt: Option<isize> = match &n_expr {
928                    Expr::Int(n) => Some(*n as isize),
929                    Expr::BinaryOp {
930                        left,
931                        op: BinaryOp::Subtract,
932                        right,
933                    } => {
934                        if matches!(left.as_ref(), Expr::Int(0)) {
935                            if let Expr::Int(k) = right.as_ref() {
936                                Some(-(*k as isize))
937                            } else {
938                                None
939                            }
940                        } else {
941                            None
942                        }
943                    }
944                    _ => None,
945                };
946                if let Some(n) = literal_len_opt {
947                    if n < 0 {
948                        return Err(ParseError::TypeError(
949                            "memcmp length must be non-negative".to_string(),
950                        ));
951                    }
952                    let l = n as usize;
953                    if let Some(la) = hex_len(&a_expr) {
954                        if l > la {
955                            return Err(ParseError::TypeError(format!(
956                                "memcmp length ({l}) exceeds hex pattern size on left side ({la} bytes)"
957                            )));
958                        }
959                    }
960                    if let Some(lb) = hex_len(&b_expr) {
961                        if l > lb {
962                            return Err(ParseError::TypeError(format!(
963                                "memcmp length ({l}) exceeds hex pattern size on right side ({lb} bytes)"
964                            )));
965                        }
966                    }
967                }
968                n_expr
969            } else {
970                // No len provided: allow only when at least one side is hex(...)
971                let la = hex_len(&a_expr);
972                let lb = hex_len(&b_expr);
973                match (la, lb) {
974                    (Some(l), None) | (None, Some(l)) => Expr::Int(l as i64),
975                    (Some(la), Some(lb)) => {
976                        if la != lb {
977                            return Err(ParseError::TypeError(
978                                "memcmp hex operands have different sizes; provide explicit len"
979                                    .to_string(),
980                            ));
981                        }
982                        Expr::Int(la as i64)
983                    }
984                    _ => {
985                        return Err(ParseError::TypeError(
986                            "memcmp without len requires at least one hex(...) operand".to_string(),
987                        ))
988                    }
989                }
990            };
991
992            // Constant folding: memcmp(hex(...), hex(...), N)
993            let as_hex = |e: &Expr| -> Option<String> {
994                if let Expr::BuiltinCall { name, args } = e {
995                    if name == "hex" {
996                        if let Some(Expr::String(s)) = args.first() {
997                            return Some(s.clone());
998                        }
999                    }
1000                }
1001                None
1002            };
1003
1004            if let (Some(h1), Some(h2), Expr::Int(n)) = (as_hex(&a_expr), as_hex(&b_expr), &n_expr)
1005            {
1006                // Safe hex -> bytes (sanitized earlier to hex digits only)
1007                fn hex_to_bytes(s: &str) -> std::result::Result<Vec<u8>, ParseError> {
1008                    let mut out = Vec::with_capacity(s.len() / 2);
1009                    let bytes = s.as_bytes();
1010                    let mut i = 0;
1011                    while i + 1 < bytes.len() {
1012                        let h = bytes[i] as char;
1013                        let l = bytes[i + 1] as char;
1014                        let hv = h
1015                            .to_digit(16)
1016                            .ok_or_else(|| ParseError::TypeError("invalid hex digit".to_string()))?
1017                            as u8;
1018                        let lv = l
1019                            .to_digit(16)
1020                            .ok_or_else(|| ParseError::TypeError("invalid hex digit".to_string()))?
1021                            as u8;
1022                        out.push((hv << 4) | lv);
1023                        i += 2;
1024                    }
1025                    Ok(out)
1026                }
1027
1028                let v1 = hex_to_bytes(&h1)?;
1029                let v2 = hex_to_bytes(&h2)?;
1030                let ln = (*n).max(0) as usize;
1031                let eq = v1.iter().take(ln).eq(v2.iter().take(ln));
1032                return Ok(Expr::Bool(eq));
1033            }
1034
1035            Ok(Expr::BuiltinCall {
1036                name: "memcmp".to_string(),
1037                args: vec![a_expr, b_expr, n_expr],
1038            })
1039        }
1040        Rule::strncmp_call => {
1041            // grammar: strncmp("(" expr "," expr "," expr ")") [len must be non-negative integer literal]
1042            let arg0 = parse_expr(it.next().ok_or(ParseError::InvalidExpression)?)?;
1043            let arg1 = parse_expr(it.next().ok_or(ParseError::InvalidExpression)?)?;
1044            let n_expr_parsed = parse_expr(it.next().ok_or(ParseError::InvalidExpression)?)?;
1045            let n_val: i64 = match n_expr_parsed {
1046                Expr::Int(v) if v >= 0 => v,
1047                _ => {
1048                    return Err(ParseError::TypeError(
1049                        "strncmp third argument must be a non-negative integer literal".to_string(),
1050                    ))
1051                }
1052            };
1053            // Optional constant fold when both sides are string literals
1054            if let (Expr::String(a), Expr::String(b)) = (&arg0, &arg1) {
1055                let ln = n_val.max(0) as usize;
1056                let eq = a
1057                    .as_bytes()
1058                    .iter()
1059                    .take(ln)
1060                    .eq(b.as_bytes().iter().take(ln));
1061                return Ok(Expr::Bool(eq));
1062            }
1063            Ok(Expr::BuiltinCall {
1064                name: "strncmp".to_string(),
1065                args: vec![arg0, arg1, Expr::Int(n_val)],
1066            })
1067        }
1068        Rule::starts_with_call => {
1069            // grammar: starts_with("(" expr "," expr ")")
1070            let arg0 = parse_expr(it.next().ok_or(ParseError::InvalidExpression)?)?;
1071            let arg1 = parse_expr(it.next().ok_or(ParseError::InvalidExpression)?)?;
1072            // Constant fold when both are string literals
1073            if let (Expr::String(a), Expr::String(b)) = (&arg0, &arg1) {
1074                return Ok(Expr::Bool(a.as_bytes().starts_with(b.as_bytes())));
1075            }
1076            Ok(Expr::BuiltinCall {
1077                name: "starts_with".to_string(),
1078                args: vec![arg0, arg1],
1079            })
1080        }
1081        Rule::hex_call => {
1082            // grammar: hex("HEX...")
1083            // Validate at parse time: allow only hex digits with optional whitespace separators.
1084            let lit_node = it.next().ok_or(ParseError::InvalidExpression)?;
1085            if lit_node.as_rule() != Rule::string {
1086                return Err(ParseError::TypeError(
1087                    "hex expects a string literal".to_string(),
1088                ));
1089            }
1090            let raw = lit_node.as_str();
1091            let inner = &raw[1..raw.len() - 1];
1092            let mut sanitized = String::with_capacity(inner.len());
1093            for ch in inner.chars() {
1094                if ch.is_ascii_hexdigit() {
1095                    sanitized.push(ch);
1096                } else if ch == ' ' {
1097                    // allow spaces as separators (tabs not allowed)
1098                    continue;
1099                } else {
1100                    return Err(ParseError::TypeError(format!(
1101                        "hex literal contains non-hex character: '{ch}'"
1102                    )));
1103                }
1104            }
1105            if sanitized.len() % 2 == 1 {
1106                return Err(ParseError::TypeError(
1107                    "hex literal must contain an even number of hex digits".to_string(),
1108                ));
1109            }
1110            Ok(Expr::BuiltinCall {
1111                name: "hex".to_string(),
1112                // Store sanitized hex-only string; codegen will convert to bytes
1113                args: vec![Expr::String(sanitized)],
1114            })
1115        }
1116        _ => Err(ParseError::UnexpectedToken(rule)),
1117    }
1118}
1119
1120fn parse_trace_pattern(pair: Pair<Rule>) -> Result<TracePattern> {
1121    let inner = pair
1122        .into_inner()
1123        .next()
1124        .ok_or(ParseError::InvalidExpression)?;
1125
1126    match inner.as_rule() {
1127        Rule::module_hex_address => {
1128            let mut parts = inner.into_inner();
1129            let module = parts
1130                .next()
1131                .ok_or(ParseError::InvalidExpression)?
1132                .as_str()
1133                .to_string();
1134            let hex = parts.next().ok_or(ParseError::InvalidExpression)?.as_str();
1135            let addr = match u64::from_str_radix(&hex[2..], 16) {
1136                Ok(v) => v,
1137                Err(_) => {
1138                    return Err(ParseError::SyntaxError(format!(
1139                        "module-qualified address '{hex}' is invalid or too large for u64"
1140                    )))
1141                }
1142            };
1143            Ok(TracePattern::AddressInModule {
1144                module,
1145                address: addr,
1146            })
1147        }
1148        Rule::hex_address => {
1149            let addr_str = inner.as_str();
1150            // Remove "0x" prefix and parse as hex
1151            let addr_hex = &addr_str[2..];
1152            let addr = match u64::from_str_radix(addr_hex, 16) {
1153                Ok(v) => v,
1154                Err(_) => {
1155                    return Err(ParseError::SyntaxError(format!(
1156                        "address '{addr_str}' is invalid or too large for u64"
1157                    )))
1158                }
1159            };
1160            Ok(TracePattern::Address(addr))
1161        }
1162        Rule::wildcard_pattern => {
1163            let pattern = inner.as_str().to_string();
1164            Ok(TracePattern::Wildcard(pattern))
1165        }
1166        Rule::function_name => {
1167            let func_name = inner
1168                .into_inner()
1169                .next()
1170                .ok_or(ParseError::InvalidExpression)?
1171                .as_str()
1172                .to_string();
1173            Ok(TracePattern::FunctionName(func_name))
1174        }
1175        Rule::source_line => {
1176            let mut parts = inner.into_inner();
1177            let file_path = parts
1178                .next()
1179                .ok_or(ParseError::InvalidExpression)?
1180                .as_str()
1181                .to_string();
1182            let line_pair = parts.next().ok_or(ParseError::InvalidExpression)?;
1183            let line_number = line_pair
1184                .as_str()
1185                .parse::<u32>()
1186                .map_err(|_| ParseError::InvalidExpression)?;
1187            Ok(TracePattern::SourceLine {
1188                file_path,
1189                line_number,
1190            })
1191        }
1192        _ => Err(ParseError::UnexpectedToken(inner.as_rule())),
1193    }
1194}
1195
1196fn parse_print_content(pair: Pair<Rule>) -> Result<PrintStatement> {
1197    info!(
1198        "parse_print_content: rule={:?} text=\"{}\"",
1199        pair.as_rule(),
1200        pair.as_str().trim()
1201    );
1202    // Flatten any nested print_content nodes into a single list of children
1203    fn collect_flattened<'a>(p: Pair<'a, Rule>, out: &mut Vec<Pair<'a, Rule>>) {
1204        if p.as_rule() == Rule::print_content {
1205            for c in p.into_inner() {
1206                collect_flattened(c, out);
1207            }
1208        } else {
1209            out.push(p);
1210        }
1211    }
1212
1213    let mut flat: Vec<Pair<Rule>> = Vec::new();
1214    collect_flattened(pair, &mut flat);
1215    info!(
1216        "parse_print_content: flat_rules=[{}]",
1217        flat.iter()
1218            .map(|p| format!("{:?}", p.as_rule()))
1219            .collect::<Vec<_>>()
1220            .join(", ")
1221    );
1222    if flat.is_empty() {
1223        return Err(ParseError::InvalidExpression);
1224    }
1225
1226    // Prefer an explicit format_expr if present
1227    if let Some(fmt_idx) = flat.iter().position(|p| p.as_rule() == Rule::format_expr) {
1228        let fmt_pair = flat.remove(fmt_idx);
1229        info!("parse_print_content: branch=format_expr");
1230        let mut inner_pairs = fmt_pair.into_inner();
1231        let format_string = inner_pairs.next().ok_or(ParseError::InvalidExpression)?;
1232        let format_content = &format_string.as_str()[1..format_string.as_str().len() - 1];
1233        let mut args = Vec::new();
1234        for arg_pair in inner_pairs {
1235            args.push(parse_expr(arg_pair)?);
1236        }
1237        info!(
1238            "parse_print_content: fmt='{}' argc={}",
1239            format_content,
1240            args.len()
1241        );
1242        FormatValidator::validate_format_arguments(format_content, &args)?;
1243        return Ok(PrintStatement::Formatted {
1244            format: format_content.to_string(),
1245            args,
1246        });
1247    }
1248
1249    // Else, if first is a string and followed by one or more exprs, treat as flattened format
1250    if flat[0].as_rule() == Rule::string && flat.len() >= 2 {
1251        info!("parse_print_content: branch=flattened_string_with_args");
1252        let content_quoted = flat[0].as_str();
1253        let content = &content_quoted[1..content_quoted.len() - 1];
1254        let mut args = Vec::new();
1255        for p in flat.iter().skip(1) {
1256            if p.as_rule() != Rule::expr {
1257                return Err(ParseError::UnexpectedToken(p.as_rule()));
1258            }
1259            args.push(parse_expr(p.clone())?);
1260        }
1261        info!("parse_print_content: fmt='{}' argc={}", content, args.len());
1262        FormatValidator::validate_format_arguments(content, &args)?;
1263        return Ok(PrintStatement::Formatted {
1264            format: content.to_string(),
1265            args,
1266        });
1267    }
1268
1269    // Single string or single expr
1270    match flat[0].as_rule() {
1271        Rule::string => {
1272            info!("parse_print_content: branch=plain_string");
1273            let content = flat[0].as_str();
1274            let content = &content[1..content.len() - 1];
1275            Ok(PrintStatement::String(content.to_string()))
1276        }
1277        Rule::expr => {
1278            info!("parse_print_content: branch=complex_variable");
1279            let expr = parse_expr(flat[0].clone())?;
1280            Ok(PrintStatement::ComplexVariable(expr))
1281        }
1282        other => {
1283            info!("parse_print_content: branch=unexpected rule={:?}", other);
1284            Err(ParseError::UnexpectedToken(other))
1285        }
1286    }
1287}
1288
1289// Parse complex variable expressions (person.name, arr[0], etc.)
1290fn parse_complex_variable(pair: Pair<Rule>) -> Result<Expr> {
1291    debug!(
1292        "parse_complex_variable: {:?} = \"{}\"",
1293        pair.as_rule(),
1294        pair.as_str().trim()
1295    );
1296
1297    let inner = pair
1298        .into_inner()
1299        .next()
1300        .ok_or(ParseError::InvalidExpression)?;
1301    match inner.as_rule() {
1302        Rule::chain_access => parse_chain_access(inner),
1303        Rule::array_access => parse_array_access(inner),
1304        Rule::member_access => parse_member_access(inner),
1305        Rule::pointer_deref => parse_pointer_deref(inner),
1306        Rule::address_of => parse_address_of(inner),
1307        _ => Err(ParseError::UnexpectedToken(inner.as_rule())),
1308    }
1309}
1310
1311// Parse chain access: person.name.first
1312fn parse_chain_access(pair: Pair<Rule>) -> Result<Expr> {
1313    let mut chain: Vec<String> = Vec::new();
1314    let mut opt_index: Option<Expr> = None;
1315    for inner_pair in pair.into_inner() {
1316        match inner_pair.as_rule() {
1317            Rule::identifier => {
1318                chain.push(inner_pair.as_str().to_string());
1319            }
1320            Rule::expr => {
1321                // Support array index only at the end of the chain (Phase 1: require literal int)
1322                let parsed = parse_expr(inner_pair)?;
1323                if !matches!(parsed, Expr::Int(_)) {
1324                    return Err(ParseError::UnsupportedFeature(
1325                        "array index must be a literal integer (TODO: dynamic index)".to_string(),
1326                    ));
1327                }
1328                opt_index = Some(parsed);
1329            }
1330            _ => {}
1331        }
1332    }
1333
1334    if chain.is_empty() {
1335        return Err(ParseError::InvalidExpression);
1336    }
1337
1338    // Build base expression from the chain identifiers
1339    let mut expr = Expr::Variable(chain[0].clone());
1340    for seg in &chain[1..] {
1341        expr = Expr::MemberAccess(Box::new(expr), seg.clone());
1342    }
1343
1344    // If there's a trailing index, convert to ArrayAccess on the built base
1345    if let Some(idx) = opt_index {
1346        expr = Expr::ArrayAccess(Box::new(expr), Box::new(idx));
1347    }
1348
1349    Ok(expr)
1350}
1351
1352// Parse array access: arr[index]
1353fn parse_array_access(pair: Pair<Rule>) -> Result<Expr> {
1354    let mut inner_pairs = pair.into_inner();
1355    let array_name = inner_pairs.next().ok_or(ParseError::InvalidExpression)?;
1356    let index_expr = inner_pairs.next().ok_or(ParseError::InvalidExpression)?;
1357
1358    let _array_expr = Box::new(Expr::Variable(array_name.as_str().to_string()));
1359    let parsed_index = parse_expr(index_expr)?;
1360
1361    // Enforce: array index must be a literal integer at parse stage
1362    if !matches!(parsed_index, Expr::Int(_)) {
1363        return Err(ParseError::UnsupportedFeature(
1364            "array index must be a literal integer (TODO: support non-literal)".to_string(),
1365        ));
1366    }
1367
1368    // Build base array access expression
1369    let mut expr = Expr::ArrayAccess(
1370        Box::new(Expr::Variable(array_name.as_str().to_string())),
1371        Box::new(parsed_index),
1372    );
1373
1374    // Consume trailing .field segments if present
1375    for next in inner_pairs {
1376        // Any remaining tokens are member identifiers
1377        let m = next.as_str().to_string();
1378        expr = Expr::MemberAccess(Box::new(expr), m);
1379    }
1380
1381    Ok(expr)
1382}
1383
1384// Parse member access: person.name
1385fn parse_member_access(pair: Pair<Rule>) -> Result<Expr> {
1386    let mut parts = pair.into_inner();
1387    let base = parts
1388        .next()
1389        .ok_or(ParseError::InvalidExpression)?
1390        .as_str()
1391        .to_string();
1392
1393    // Collect all subsequent identifiers after the base
1394    let mut tail: Vec<String> = Vec::new();
1395    for p in parts {
1396        tail.push(p.as_str().to_string());
1397    }
1398
1399    // If there is only one member, keep MemberAccess for simplicity.
1400    // For multi-level chains like a.b.c, normalize to ChainAccess([a, b, c])
1401    match tail.len() {
1402        0 => Err(ParseError::InvalidExpression),
1403        1 => Ok(Expr::MemberAccess(
1404            Box::new(Expr::Variable(base)),
1405            tail.remove(0),
1406        )),
1407        _ => {
1408            let mut chain = Vec::with_capacity(1 + tail.len());
1409            chain.push(base);
1410            chain.extend(tail);
1411            Ok(Expr::ChainAccess(chain))
1412        }
1413    }
1414}
1415
1416// Parse pointer dereference: *ptr
1417fn parse_pointer_deref(pair: Pair<Rule>) -> Result<Expr> {
1418    let mut inner = pair.into_inner();
1419    let target = inner.next().ok_or(ParseError::InvalidExpression)?;
1420    let parsed = match target.as_rule() {
1421        Rule::expr => parse_expr(target)?,
1422        Rule::complex_variable => parse_complex_variable(target)?,
1423        Rule::identifier => Expr::Variable(target.as_str().to_string()),
1424        _ => return Err(ParseError::UnexpectedToken(target.as_rule())),
1425    };
1426    // Early normalization: *(&x) => x
1427    match parsed {
1428        Expr::AddressOf(inner_expr) => Ok(*inner_expr),
1429        other => Ok(Expr::PointerDeref(Box::new(other))),
1430    }
1431}
1432
1433// Parse address-of: &expr
1434fn parse_address_of(pair: Pair<Rule>) -> Result<Expr> {
1435    let mut inner = pair.into_inner();
1436    let target = inner.next().ok_or(ParseError::InvalidExpression)?;
1437    let parsed = match target.as_rule() {
1438        Rule::expr => parse_expr(target)?,
1439        Rule::complex_variable => parse_complex_variable(target)?,
1440        Rule::identifier => Expr::Variable(target.as_str().to_string()),
1441        _ => return Err(ParseError::UnexpectedToken(target.as_rule())),
1442    };
1443    // Early normalization: &(*p) => p
1444    match parsed {
1445        Expr::PointerDeref(inner_expr) => Ok(*inner_expr),
1446        other => Ok(Expr::AddressOf(Box::new(other))),
1447    }
1448}
1449
1450#[cfg(test)]
1451mod tests {
1452    use super::*;
1453
1454    #[test]
1455    fn parse_memcmp_builtin_in_if_should_succeed() {
1456        let script = r#"
1457trace foo {
1458    if memcmp(&buf[0], &buf[1], 16) { print "EQ"; }
1459}
1460"#;
1461        let r = parse(script);
1462        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1463    }
1464
1465    #[test]
1466    fn parse_memcmp_with_dynamic_len() {
1467        let script = r#"
1468trace foo {
1469    let n = 10;
1470    if memcmp(&buf[0], &buf[0], n) { print "OK"; }
1471}
1472"#;
1473        let r = parse(script);
1474        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1475    }
1476
1477    #[test]
1478    fn parse_if_else_with_flattened_format_and_star_len() {
1479        // else branch contains a flattened format print with {:s.*} and two args
1480        let script = r#"
1481trace src/http/ngx_http_request.c:1845 {
1482    if strncmp(host.data, "ghostscope", 10) {
1483        print "We got the request {}", *r;
1484    } else {
1485        print "The other hostname is {:s.*}", host.len, host.data;
1486    }
1487}
1488"#;
1489        let r = parse(script);
1490        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1491    }
1492
1493    #[test]
1494    fn parse_memcmp_len_zero_and_negative() {
1495        let script = r#"
1496trace foo {
1497    if memcmp(&p[0], &q[0], 0) { print "Z0"; }
1498    let k = -5;
1499    if memcmp(&p[0], &q[0], k) { print "NEG"; }
1500}
1501"#;
1502        let r = parse(script);
1503        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1504    }
1505
1506    #[test]
1507    fn parse_numeric_literals_hex_oct_bin_and_memcmp_usage() {
1508        let script = r#"
1509trace foo {
1510    let a = 0x10;   // 16
1511    let b = 0o755;  // 493
1512    let c = 0b1010; // 10
1513    // use in memcmp length
1514    if memcmp(&buf[0], &buf[0], 0x20) { print "H"; }
1515    if memcmp(&buf[0], &buf[0], 0o40) { print "O"; }
1516    if memcmp(&buf[0], &buf[0], 0b100000) { print "B"; }
1517    // use numeric literal as pointer address for second arg
1518    if memcmp(&buf[0], 0x7fff0000, 16) { print "P"; }
1519}
1520"#;
1521        let r = parse(script);
1522        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1523    }
1524
1525    #[test]
1526    fn parse_memcmp_hex_builtin() {
1527        let script = r#"
1528trace foo {
1529    if memcmp(&buf[0], hex("504F"), 2) { print "OK"; }
1530}
1531"#;
1532        let r = parse(script);
1533        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1534    }
1535
1536    #[test]
1537    fn parse_memcmp_with_numeric_pointers_and_len_bases() {
1538        let script = r#"
1539trace foo {
1540    let n = 0x10;
1541    if memcmp(0x1000, 0x2000, n) { print "NP"; }
1542    if memcmp(0o4000, 0b1000000000000, 0o20) { print "NP2"; }
1543}
1544"#;
1545        let r = parse(script);
1546        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1547    }
1548
1549    #[test]
1550    fn parse_hex_with_non_hex_char_should_fail() {
1551        let script = r#"
1552trace foo {
1553    if memcmp(&buf[0], hex("G0"), 1) { print "X"; }
1554}
1555"#;
1556        let r = parse(script);
1557        match r {
1558            Ok(_) => panic!("expected parse error for non-hex char"),
1559            Err(ParseError::TypeError(msg)) => {
1560                assert!(
1561                    msg.contains("hex literal contains non-hex character"),
1562                    "unexpected msg: {msg}"
1563                );
1564            }
1565            Err(e) => panic!("unexpected error variant: {e:?}"),
1566        }
1567    }
1568
1569    #[test]
1570    fn parse_hex_with_odd_digits_should_fail() {
1571        let script = r#"
1572trace foo {
1573    if memcmp(&buf[0], hex("123"), 1) { print "X"; }
1574}
1575"#;
1576        let r = parse(script);
1577        match r {
1578            Ok(_) => panic!("expected parse error for odd-length hex"),
1579            Err(ParseError::TypeError(msg)) => {
1580                assert!(
1581                    msg.contains("even number of hex digits"),
1582                    "unexpected msg: {msg}"
1583                );
1584            }
1585            Err(e) => panic!("unexpected error variant: {e:?}"),
1586        }
1587    }
1588
1589    #[test]
1590    fn parse_hex_with_spaces_should_succeed() {
1591        let script = r#"
1592trace foo {
1593    if memcmp(&buf[0], hex("4c 49 42 5f"), 4) { print "OK"; }
1594}
1595"#;
1596        let r = parse(script);
1597        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1598    }
1599
1600    #[test]
1601    fn parse_alias_declaration_address_of_and_member_access() {
1602        let script = r#"
1603trace foo {
1604    let p = &buf[0];
1605    let s = obj.field;
1606}
1607"#;
1608        let prog = parse(script).expect("parse ok");
1609        let stmt0 = prog.statements.first().expect("trace");
1610        match stmt0 {
1611            Statement::TracePoint { body, .. } => {
1612                // Only the address-of form should be alias; member access is a value binding
1613                assert!(matches!(body[0], Statement::AliasDeclaration { .. }));
1614                assert!(matches!(body[1], Statement::VarDeclaration { .. }));
1615            }
1616            other => panic!("expected TracePoint, got {other:?}"),
1617        }
1618    }
1619
1620    #[test]
1621    fn parse_alias_declaration_with_constant_offset() {
1622        let script = r#"
1623trace foo {
1624    let p = &arr[0] + 16;
1625    let q = 32 + &arr[0];
1626}
1627"#;
1628        let prog = parse(script).expect("parse ok");
1629        let stmt0 = prog.statements.first().expect("trace");
1630        match stmt0 {
1631            Statement::TracePoint { body, .. } => {
1632                assert!(matches!(body[0], Statement::AliasDeclaration { .. }));
1633                assert!(matches!(body[1], Statement::AliasDeclaration { .. }));
1634            }
1635            other => panic!("expected TracePoint, got {other:?}"),
1636        }
1637    }
1638
1639    #[test]
1640    fn parse_member_access_scalar_not_alias() {
1641        let script = r#"
1642trace foo {
1643    let level = record.level;
1644}
1645"#;
1646        let prog = parse(script).expect("parse ok");
1647        let stmt0 = prog.statements.first().expect("trace");
1648        match stmt0 {
1649            Statement::TracePoint { body, .. } => {
1650                assert!(matches!(body[0], Statement::VarDeclaration { .. }));
1651            }
1652            other => panic!("expected TracePoint, got {other:?}"),
1653        }
1654    }
1655
1656    #[test]
1657    fn parse_memcmp_rejects_string_literal() {
1658        let script = r#"
1659trace foo {
1660    if memcmp(&buf[0], "PO", 2) { print "X"; }
1661}
1662"#;
1663        let r = parse(script);
1664        assert!(
1665            matches!(r, Err(ParseError::TypeError(ref msg)) if msg.contains("memcmp does not accept string literals")),
1666            "expected type error, got: {r:?}"
1667        );
1668    }
1669
1670    #[test]
1671    fn parse_memcmp_rejects_bool_args_and_len() {
1672        // Bool as pointer argument
1673        let s1 = r#"
1674trace foo { if memcmp(true, hex("00"), 1) { print "X"; } }
1675"#;
1676        let r1 = parse(s1);
1677        assert!(r1.is_err());
1678
1679        // Bool as length
1680        let s2 = r#"
1681trace foo { if memcmp(&p[0], hex("00"), false) { print "X"; } }
1682"#;
1683        let r2 = parse(s2);
1684        assert!(
1685            matches!(r2, Err(ParseError::TypeError(ref msg)) if msg.contains("length must be")),
1686            "unexpected: {r2:?}"
1687        );
1688    }
1689
1690    #[test]
1691    fn parse_strncmp_constant_folds_on_two_literals() {
1692        // equal for first 2 bytes
1693        let s = r#"
1694trace foo {
1695    if strncmp("abc", "abd", 2) { print "T"; } else { print "F"; }
1696}
1697"#;
1698        let prog = parse(s).expect("parse ok");
1699        // Walk down to the If condition and ensure it became a Bool(true)
1700        let stmt0 = prog.statements.first().expect("one trace");
1701        match stmt0 {
1702            Statement::TracePoint { body, .. } => match &body[0] {
1703                Statement::If { condition, .. } => {
1704                    assert!(matches!(condition, Expr::Bool(true)));
1705                }
1706                other => panic!("expected If, got {other:?}"),
1707            },
1708            other => panic!("expected TracePoint, got {other:?}"),
1709        }
1710    }
1711
1712    #[test]
1713    fn parse_strncmp_requires_one_string_side_error() {
1714        let s = r#"
1715trace foo {
1716    if strncmp(1, 2, 1) { print "X"; }
1717}
1718"#;
1719        let r = parse(s);
1720        // Parser now accepts generic expr, so error will occur in compiler stage; ensure parse ok here
1721        assert!(
1722            r.is_ok(),
1723            "parse should succeed; semantic error in compiler"
1724        );
1725    }
1726
1727    #[test]
1728    fn parse_memcmp_constant_folds_on_two_hex() {
1729        let s = r#"
1730trace foo {
1731    if memcmp(hex("504f"), hex("504F"), 2) { print "EQ"; } else { print "NE"; }
1732}
1733"#;
1734        let prog = parse(s).expect("parse ok");
1735        let stmt0 = prog.statements.first().expect("one trace");
1736        match stmt0 {
1737            Statement::TracePoint { body, .. } => match &body[0] {
1738                Statement::If { condition, .. } => assert!(matches!(condition, Expr::Bool(true))),
1739                other => panic!("expected If, got {other:?}"),
1740            },
1741            other => panic!("expected TracePoint, got {other:?}"),
1742        }
1743
1744        // Mismatch without explicit len but equal sizes
1745        let s2 = r#"
1746trace foo {
1747    if memcmp(hex("504f"), hex("514f")) { print "EQ"; } else { print "NE"; }
1748}
1749"#;
1750        let prog2 = parse(s2).expect("parse ok");
1751        let stmt02 = prog2.statements.first().expect("one trace");
1752        match stmt02 {
1753            Statement::TracePoint { body, .. } => match &body[0] {
1754                Statement::If { condition, .. } => assert!(matches!(condition, Expr::Bool(false))),
1755                other => panic!("expected If, got {other:?}"),
1756            },
1757            other => panic!("expected TracePoint, got {other:?}"),
1758        }
1759    }
1760
1761    #[test]
1762    fn parse_starts_with_constant_folds_on_two_literals() {
1763        let s = r#"
1764trace foo {
1765    if starts_with("abcdef", "abc") { print "T"; } else { print "F"; }
1766}
1767"#;
1768        let prog = parse(s).expect("parse ok");
1769        let stmt0 = prog.statements.first().expect("one trace");
1770        match stmt0 {
1771            Statement::TracePoint { body, .. } => match &body[0] {
1772                Statement::If { condition, .. } => assert!(matches!(condition, Expr::Bool(true))),
1773                other => panic!("expected If, got {other:?}"),
1774            },
1775            other => panic!("expected TracePoint, got {other:?}"),
1776        }
1777
1778        let s2 = r#"
1779trace foo {
1780    if starts_with("ab", "abc") { print "T"; } else { print "F"; }
1781}
1782"#;
1783        let prog2 = parse(s2).expect("parse ok");
1784        let stmt02 = prog2.statements.first().expect("one trace");
1785        match stmt02 {
1786            Statement::TracePoint { body, .. } => match &body[0] {
1787                Statement::If { condition, .. } => assert!(matches!(condition, Expr::Bool(false))),
1788                other => panic!("expected If, got {other:?}"),
1789            },
1790            other => panic!("expected TracePoint, got {other:?}"),
1791        }
1792    }
1793
1794    #[test]
1795    fn parse_memcmp_hex_len_exceeds_left_should_fail() {
1796        // hex has 2 bytes, len=3 should error on left side
1797        let script = r#"
1798trace foo {
1799    if memcmp(hex("504f"), &buf[0], 3) { print "X"; }
1800}
1801"#;
1802        let r = parse(script);
1803        match r {
1804            Ok(_) => panic!("expected parse error for len > hex(left) size"),
1805            Err(ParseError::TypeError(msg)) => {
1806                assert!(
1807                    msg.contains("exceeds hex pattern size on left side"),
1808                    "unexpected msg: {msg}"
1809                );
1810            }
1811            Err(e) => panic!("unexpected error variant: {e:?}"),
1812        }
1813    }
1814
1815    #[test]
1816    fn parse_memcmp_hex_len_exceeds_right_should_fail() {
1817        // hex has 2 bytes, len=5 should error on right side
1818        let script = r#"
1819trace foo {
1820    if memcmp(&buf[0], hex("50 4f"), 5) { print "X"; }
1821}
1822"#;
1823        let r = parse(script);
1824        match r {
1825            Ok(_) => panic!("expected parse error for len > hex(right) size"),
1826            Err(ParseError::TypeError(msg)) => {
1827                assert!(
1828                    msg.contains("exceeds hex pattern size on right side"),
1829                    "unexpected msg: {msg}"
1830                );
1831            }
1832            Err(e) => panic!("unexpected error variant: {e:?}"),
1833        }
1834    }
1835
1836    #[test]
1837    fn parse_memcmp_hex_negative_len_should_fail() {
1838        let script = r#"
1839trace foo {
1840    if memcmp(&buf[0], hex("50 4f"), -1) { print "X"; }
1841}
1842"#;
1843        let r = parse(script);
1844        match r {
1845            Ok(_) => panic!("expected parse error for negative len"),
1846            Err(ParseError::TypeError(msg)) => {
1847                assert!(
1848                    msg.contains("length must be non-negative"),
1849                    "unexpected msg: {msg}"
1850                );
1851            }
1852            Err(e) => panic!("unexpected error variant: {e:?}"),
1853        }
1854    }
1855
1856    #[test]
1857    fn parse_memcmp_hex_len_equal_should_succeed() {
1858        // hex has 4 bytes, len=4 OK
1859        let script = r#"
1860trace foo {
1861    if memcmp(&buf[0], hex("de ad be ef"), 4) { print "OK"; }
1862}
1863"#;
1864        let r = parse(script);
1865        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1866    }
1867
1868    #[test]
1869    fn parse_memcmp_hex_infers_len_left_should_succeed() {
1870        let script = r#"
1871trace foo {
1872    if memcmp(hex("50 4f"), &buf[0]) { print "OK"; }
1873}
1874"#;
1875        let r = parse(script);
1876        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1877    }
1878
1879    #[test]
1880    fn parse_memcmp_hex_infers_len_right_should_succeed() {
1881        let script = r#"
1882trace foo {
1883    if memcmp(&buf[0], hex("de ad be ef")) { print "OK"; }
1884}
1885"#;
1886        let r = parse(script);
1887        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1888    }
1889
1890    #[test]
1891    fn parse_assignment_is_rejected_with_friendly_message() {
1892        let script = r#"
1893trace foo {
1894    let a = 1;
1895    a = 2;
1896}
1897"#;
1898        let r = parse(script);
1899        match r {
1900            Ok(_) => panic!("expected assignment error for immutable variables"),
1901            Err(ParseError::TypeError(msg)) => {
1902                assert!(
1903                    msg.contains("Assignment is not supported"),
1904                    "unexpected msg: {msg}"
1905                );
1906            }
1907            Err(e) => panic!("unexpected error variant: {e:?}"),
1908        }
1909    }
1910
1911    #[test]
1912    fn parse_starts_with_accepts_two_exprs() {
1913        // Both sides are expr (identifiers); grammar should accept
1914        let script = r#"
1915trace foo {
1916    if starts_with(name, s) { print "OK"; }
1917}
1918"#;
1919        let r = parse(script);
1920        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1921    }
1922
1923    #[test]
1924    fn parse_strncmp_accepts_two_exprs_and_len() {
1925        let script = r#"
1926trace foo {
1927    if strncmp(lhs, rhs, 3) { print "EQ"; }
1928}
1929"#;
1930        let r = parse(script);
1931        assert!(r.is_ok(), "parse failed: {:?}", r.err());
1932    }
1933
1934    #[test]
1935    fn parse_strncmp_negative_len_rejected() {
1936        // Third argument must be a non-negative integer literal
1937        let script = r#"
1938trace foo {
1939    if strncmp(lhs, rhs, -1) { print "X"; }
1940}
1941"#;
1942        let r = parse(script);
1943        assert!(r.is_err(), "expected parse error for negative length");
1944        if let Err(ParseError::TypeError(msg)) = r {
1945            assert!(msg.contains("non-negative"), "unexpected msg: {msg}");
1946        }
1947    }
1948
1949    #[test]
1950    fn parse_strncmp_nonliteral_len_rejected_with_friendly_message() {
1951        // len is variable -> reject with friendly message
1952        let script = r#"
1953trace foo {
1954    let n = 3;
1955    if strncmp(lhs, rhs, n) { print "X"; }
1956}
1957"#;
1958        let r = parse(script);
1959        match r {
1960            Err(ParseError::TypeError(msg)) => {
1961                assert!(
1962                    msg.contains("third argument must be a non-negative integer literal"),
1963                    "{msg}"
1964                );
1965            }
1966            other => panic!("expected TypeError for non-literal len, got {other:?}"),
1967        }
1968    }
1969
1970    #[test]
1971    fn parse_memcmp_missing_len_without_hex_should_fail() {
1972        let script = r#"
1973trace foo {
1974    if memcmp(&buf[0], &buf[1]) { print "OK"; }
1975}
1976"#;
1977        let r = parse(script);
1978        assert!(
1979            r.is_err(),
1980            "expected parse error for missing len without hex"
1981        );
1982    }
1983
1984    #[test]
1985    fn parse_memcmp_both_hex_mismatch_should_fail() {
1986        let script = r#"
1987trace foo {
1988    if memcmp(hex("50"), hex("504f")) { print "OK"; }
1989}
1990"#;
1991        let r = parse(script);
1992        match r {
1993            Ok(_) => panic!("expected parse error for mismatched hex sizes"),
1994            Err(ParseError::TypeError(msg)) => {
1995                assert!(msg.contains("different sizes"), "unexpected msg: {msg}");
1996            }
1997            Err(e) => panic!("unexpected error variant: {e:?}"),
1998        }
1999    }
2000
2001    #[test]
2002    fn parse_format_static_len_bases_in_prints() {
2003        // Validate that static length .N supports 0x/0o/0b in formatted prints
2004        let script = r#"
2005trace foo {
2006    print "HX={:x.0x10}", buf;
2007    print "HS={:s.0o20}", buf;
2008    print "HB={:X.0b1000}", buf;
2009}
2010"#;
2011        let r = parse(script);
2012        assert!(r.is_ok(), "parse failed: {:?}", r.err());
2013    }
2014
2015    #[test]
2016    fn parse_trace_patterns_function_line_address_wildcard() {
2017        // Function name
2018        let s1 = r#"trace main { print "OK"; }"#;
2019        assert!(parse(s1).is_ok());
2020
2021        // Source line with path and hyphen
2022        let s2 = r#"trace /tmp/test-file.c:42 { print "L"; }"#;
2023        assert!(parse(s2).is_ok());
2024
2025        // Hex address
2026        let s3 = r#"trace 0x401234 { print "A"; }"#;
2027        assert!(parse(s3).is_ok());
2028
2029        // Wildcard
2030        let s4 = r#"trace printf* { print "W"; }"#;
2031        assert!(parse(s4).is_ok());
2032
2033        // Module-qualified address
2034        let s5 = r#"trace /lib/x86_64-linux-gnu/libc.so.6:0x1234 { print "M"; }"#;
2035        assert!(parse(s5).is_ok());
2036    }
2037
2038    #[test]
2039    fn parse_module_hex_address_overflow_should_error() {
2040        // Address exceeds u64 (17 hex digits) -> parse error, not 0 fallback
2041        let s = r#"trace libfoo.so:0x10000000000000000 { print "X"; }"#;
2042        let r = parse(s);
2043        match r {
2044            Err(ParseError::SyntaxError(msg)) => assert!(msg.contains("too large for u64")),
2045            other => panic!("expected friendly SyntaxError, got {other:?}"),
2046        }
2047    }
2048
2049    #[test]
2050    fn parse_hex_address_overflow_should_error() {
2051        let s = r#"trace 0x10000000000000000 { print "X"; }"#;
2052        let r = parse(s);
2053        match r {
2054            Err(ParseError::SyntaxError(msg)) => assert!(msg.contains("too large for u64")),
2055            other => panic!("expected friendly SyntaxError, got {other:?}"),
2056        }
2057    }
2058
2059    #[test]
2060    fn parse_special_variables_basic() {
2061        // $pid/$tid/$timestamp in expressions and prints
2062        let script = r#"
2063trace foo {
2064    if $pid == 123 && $tid != 0 { print "PID_TID"; }
2065    print $timestamp;
2066    print "P:{} T:{} TS:{}", $pid, $tid, $timestamp;
2067}
2068"#;
2069        let r = parse(script);
2070        assert!(r.is_ok(), "parse failed: {:?}", r.err());
2071    }
2072
2073    #[test]
2074    fn parse_chain_and_array_access() {
2075        // Member/chain and array tail index
2076        let script = r#"
2077trace foo {
2078    print person.name.first;
2079    print arr[0];
2080    // Supported: top-level array access with trailing member
2081    print ifaces[0].mtu;
2082}
2083"#;
2084        let r = parse(script);
2085        assert!(r.is_ok(), "parse failed: {:?}", r.err());
2086    }
2087
2088    #[test]
2089    fn parse_pointer_and_address_of() {
2090        let script = r#"
2091trace foo {
2092    print *ptr;
2093    print &var;
2094    print *(arr_ptr);
2095}
2096"#;
2097        let r = parse(script);
2098        assert!(r.is_ok(), "parse failed: {:?}", r.err());
2099    }
2100
2101    #[test]
2102    fn parse_nested_trace_is_rejected() {
2103        let s = r#"
2104trace foo {
2105    trace bar { print "X"; }
2106}
2107"#;
2108        let r = parse(s);
2109        match r {
2110            Err(ParseError::SyntaxError(msg)) => assert!(msg.contains("cannot be nested")),
2111            other => panic!("expected SyntaxError for nested trace, got {other:?}"),
2112        }
2113    }
2114
2115    #[test]
2116    fn parse_float_literal_is_rejected() {
2117        let s = r#"
2118trace foo {
2119    let x = 1.23;
2120}
2121"#;
2122        let r = parse(s);
2123        match r {
2124            Err(ParseError::TypeError(msg)) => {
2125                assert!(msg.contains("float literals are not supported"))
2126            }
2127            other => panic!("expected TypeError for float literal, got {other:?}"),
2128        }
2129    }
2130
2131    #[test]
2132    fn parse_unclosed_print_string_reports_friendly_error() {
2133        let bad = r#"
2134trace foo {
2135    print "Unclosed {}, value
2136}
2137"#;
2138        let r = parse(bad);
2139        match r {
2140            Err(ParseError::SyntaxError(msg)) => assert!(msg.contains("Unclosed string literal")),
2141            other => panic!("expected SyntaxError, got {other:?}"),
2142        }
2143    }
2144
2145    #[test]
2146    fn parse_array_index_must_be_literal() {
2147        // Dynamic index on top-level array
2148        let s1 = r#"
2149trace foo {
2150    print arr[i];
2151}
2152"#;
2153        let r1 = parse(s1);
2154        assert!(r1.is_err(), "expected error for non-literal array index");
2155        if let Err(ParseError::UnsupportedFeature(msg)) = r1 {
2156            assert!(
2157                msg.contains("array index must be a literal integer"),
2158                "unexpected msg: {msg}"
2159            );
2160        }
2161
2162        // Dynamic index at chain tail
2163        let s2 = r#"
2164trace foo {
2165    print obj.arr[i];
2166}
2167"#;
2168        let r2 = parse(s2);
2169        assert!(r2.is_err(), "expected error for non-literal chain index");
2170        if let Err(ParseError::UnsupportedFeature(msg)) = r2 {
2171            assert!(msg.contains("literal integer"), "unexpected msg: {msg}");
2172        }
2173    }
2174
2175    #[test]
2176    fn parse_print_format_arg_mismatch_reports_error() {
2177        // format_expr form
2178        let s1 = r#"
2179trace foo {
2180    print "A {} {}", x;
2181}
2182"#;
2183        let r1 = parse(s1);
2184        match r1 {
2185            Err(ParseError::TypeError(msg)) => {
2186                assert!(msg.contains("expects 2 argument(s)"), "unexpected: {msg}");
2187            }
2188            other => panic!("expected TypeError from format arg mismatch, got {other:?}"),
2189        }
2190
2191        // flattened string + args form
2192        let s2 = r#"
2193trace foo {
2194    print "B {} {}", y;
2195}
2196"#;
2197        let r2 = parse(s2);
2198        match r2 {
2199            Err(ParseError::TypeError(msg)) => {
2200                assert!(msg.contains("expects 2 argument(s)"));
2201            }
2202            other => panic!("expected TypeError from format arg mismatch, got {other:?}"),
2203        }
2204    }
2205
2206    #[test]
2207    fn parse_print_invalid_format_specifier_errors() {
2208        // Missing ':' prefix inside { }
2209        let s1 = r#"
2210trace foo { print "Bad {x}", 1; }
2211"#;
2212        let r1 = parse(s1);
2213        match r1 {
2214            Err(ParseError::TypeError(msg)) => {
2215                assert!(msg.contains("Invalid format specifier"), "{msg}");
2216            }
2217            other => panic!("expected TypeError, got {other:?}"),
2218        }
2219
2220        // Unsupported conversion {:q}
2221        let s2 = r#"
2222trace foo { print "Bad {:q}", 1; }
2223"#;
2224        let r2 = parse(s2);
2225        match r2 {
2226            Err(ParseError::TypeError(msg)) => {
2227                assert!(msg.contains("Unsupported format conversion"), "{msg}");
2228            }
2229            other => panic!("expected TypeError, got {other:?}"),
2230        }
2231    }
2232
2233    #[test]
2234    fn parse_hex_with_tab_is_rejected() {
2235        let s = r#"
2236trace foo {
2237    if memcmp(&buf[0], hex("50\t4f"), 2) { print "X"; }
2238}
2239"#;
2240        let r = parse(s);
2241        match r {
2242            Err(ParseError::TypeError(msg)) => {
2243                assert!(msg.contains("non-hex character"), "{msg}");
2244            }
2245            other => panic!("expected TypeError for tab in hex literal, got {other:?}"),
2246        }
2247    }
2248
2249    #[test]
2250    fn parse_starts_with_constant_folds_on_literals() {
2251        let s = r#"
2252trace foo {
2253    if starts_with("abcdef", "abc") { print "T"; } else { print "F"; }
2254}
2255"#;
2256        let prog = parse(s).expect("parse ok");
2257        let stmt0 = prog.statements.first().expect("trace");
2258        match stmt0 {
2259            Statement::TracePoint { body, .. } => match &body[0] {
2260                Statement::If { condition, .. } => {
2261                    assert!(matches!(condition, Expr::Bool(true)));
2262                }
2263                other => panic!("expected If, got {other:?}"),
2264            },
2265            other => panic!("expected TracePoint, got {other:?}"),
2266        }
2267    }
2268
2269    #[test]
2270    fn parse_backtrace_and_bt_statements() {
2271        let s = r#"
2272trace foo {
2273    backtrace;
2274    bt;
2275}
2276"#;
2277        let r = parse(s);
2278        assert!(r.is_ok(), "parse failed: {:?}", r.err());
2279    }
2280
2281    #[test]
2282    fn parse_print_capture_len_suffix() {
2283        // {:s.name$} uses capture; does not consume extra arg
2284        let s = r#"
2285trace foo {
2286    let n = 3;
2287    print "tail={:s.n$}", p;
2288}
2289"#;
2290        let r = parse(s);
2291        assert!(r.is_ok(), "parse failed: {:?}", r.err());
2292    }
2293
2294    #[test]
2295    fn parse_unknown_keyword_inside_trace_suggests_print() {
2296        let s = r#"
2297trace foo {
2298    pront "hello";
2299}
2300"#;
2301        let r = parse(s);
2302        match r {
2303            Err(ParseError::SyntaxError(msg)) => {
2304                assert!(
2305                    msg.contains("Unknown keyword 'pront'"),
2306                    "unexpected msg: {msg}"
2307                );
2308                assert!(
2309                    msg.contains("Did you mean 'print'"),
2310                    "no suggestion in msg: {msg}"
2311                );
2312            }
2313            other => panic!("expected friendly SyntaxError for unknown keyword, got {other:?}"),
2314        }
2315    }
2316
2317    #[test]
2318    fn parse_unknown_keyword_same_line_after_brace_suggests_print() {
2319        // Unknown keyword immediately after '{' on the same line
2320        let s = r#"trace foo {pirnt \"sa\";}"#;
2321        let r = parse(s);
2322        match r {
2323            Err(ParseError::SyntaxError(msg)) => {
2324                assert!(
2325                    msg.contains("Unknown keyword 'pirnt'"),
2326                    "unexpected msg: {msg}"
2327                );
2328                assert!(
2329                    msg.contains("Did you mean 'print'"),
2330                    "no suggestion in msg: {msg}"
2331                );
2332            }
2333            other => {
2334                panic!("expected friendly SyntaxError for same-line unknown keyword, got {other:?}")
2335            }
2336        }
2337    }
2338
2339    #[test]
2340    fn parse_unknown_top_level_keyword_suggests_trace() {
2341        let s = r#"
2342traec bar {
2343    print "x";
2344}
2345"#;
2346        let r = parse(s);
2347        match r {
2348            Err(ParseError::SyntaxError(msg)) => {
2349                assert!(
2350                    msg.contains("Unknown keyword 'traec'"),
2351                    "unexpected msg: {msg}"
2352                );
2353                assert!(
2354                    msg.contains("Did you mean 'trace'"),
2355                    "no suggestion in msg: {msg}"
2356                );
2357            }
2358            other => panic!("expected friendly SyntaxError for unknown keyword, got {other:?}"),
2359        }
2360    }
2361
2362    #[test]
2363    fn parse_builtin_then_misspelled_keyword_should_point_to_misspell() {
2364        // Ensure builtin calls are not flagged; the real typo should be reported
2365        let s = r#"
2366trace foo {
2367    starts_with("a", "b"); prnit "oops";
2368}
2369"#;
2370        let r = parse(s);
2371        match r {
2372            Err(ParseError::SyntaxError(msg)) => {
2373                assert!(
2374                    msg.contains("prnit"),
2375                    "should point to misspelled 'prnit': {msg}"
2376                );
2377                assert!(
2378                    !msg.contains("starts_with"),
2379                    "should not flag builtin call: {msg}"
2380                );
2381            }
2382            other => panic!("expected friendly SyntaxError for misspelled print, got {other:?}"),
2383        }
2384    }
2385
2386    #[test]
2387    fn parse_misspelled_builtin_suggests_starts_with() {
2388        // Misspelled builtin should suggest the correct builtin name
2389        let s = r#"
2390trace foo {
2391    starst_with("a", "b");
2392}
2393"#;
2394        let r = parse(s);
2395        match r {
2396            Err(ParseError::SyntaxError(msg)) => {
2397                assert!(msg.contains("Unknown keyword 'starst_with'"), "{msg}");
2398                assert!(msg.contains("Did you mean 'starts_with'"), "{msg}");
2399            }
2400            other => panic!("expected friendly suggestion for misspelled builtin, got {other:?}"),
2401        }
2402    }
2403
2404    #[test]
2405    fn parse_misspelled_builtin_suggests_memcmp() {
2406        let s = r#"
2407trace foo {
2408    memcpm(&buf[0], &buf[1], 16);
2409}
2410"#;
2411        let r = parse(s);
2412        match r {
2413            Err(ParseError::SyntaxError(msg)) => {
2414                assert!(msg.contains("Unknown keyword 'memcpm'"), "{msg}");
2415                assert!(msg.contains("Did you mean 'memcmp'"), "{msg}");
2416            }
2417            other => panic!("expected friendly suggestion for misspelled builtin, got {other:?}"),
2418        }
2419    }
2420
2421    #[test]
2422    fn parse_if_condition_misspelled_builtin_suggests() {
2423        let s = r#"
2424trace foo {
2425    if starst_with("a", "b") { print "ok"; }
2426}
2427"#;
2428        let r = parse(s);
2429        match r {
2430            Err(ParseError::SyntaxError(msg)) => {
2431                assert!(msg.contains("Unknown keyword 'starst_with'"), "{msg}");
2432                assert!(msg.contains("Did you mean 'starts_with'"), "{msg}");
2433            }
2434            other => panic!("expected friendly suggestion inside if(), got {other:?}"),
2435        }
2436    }
2437
2438    #[test]
2439    fn parse_else_if_condition_misspelled_builtin_suggests() {
2440        let s = r#"
2441trace foo {
2442    if 1 { print "a"; } else if starst_with("a", "b") { print "b"; }
2443}
2444"#;
2445        let r = parse(s);
2446        match r {
2447            Err(ParseError::SyntaxError(msg)) => {
2448                assert!(msg.contains("Unknown keyword 'starst_with'"), "{msg}");
2449                assert!(msg.contains("Did you mean 'starts_with'"), "{msg}");
2450            }
2451            other => panic!("expected friendly suggestion inside else if(), got {other:?}"),
2452        }
2453    }
2454    #[test]
2455    fn parse_unknown_keyword_generic_expected_list() {
2456        let s = r#"
2457foobarbaz {
2458    print "x";
2459}
2460"#;
2461        let r = parse(s);
2462        match r {
2463            Err(ParseError::SyntaxError(msg)) => {
2464                assert!(
2465                    msg.contains("Unknown keyword 'foobarbaz'"),
2466                    "unexpected msg: {msg}"
2467                );
2468                assert!(
2469                    msg.contains("Expected one of"),
2470                    "missing expected list in msg: {msg}"
2471                );
2472            }
2473            other => panic!("expected friendly SyntaxError with expected list, got {other:?}"),
2474        }
2475    }
2476}