Skip to main content

openjd_expr/eval/
parse.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// Copyright by contributors to this project.
3// SPDX-License-Identifier: (Apache-2.0 OR MIT)
4
5//! Expression parsing using the ruff Python parser.
6
7use crate::error::ExpressionError;
8use crate::profile::{ExprProfile, SyntaxFeature};
9use ruff_python_ast as ast;
10use ruff_python_parser;
11use std::collections::HashMap;
12
13/// A parsed expression ready for evaluation.
14#[derive(Debug, Clone)]
15#[must_use]
16pub struct ParsedExpression {
17    pub(crate) ast: ast::Expr,
18    pub(crate) expr: String,
19    #[allow(dead_code)] // preserves original untrimmed input for potential future use
20    source: String,
21    pub(crate) keyword_renames: HashMap<String, String>,
22    pub(crate) accessed_symbols: HashSet<String>,
23    pub(crate) called_functions: HashSet<String>,
24    pub(crate) local_bindings: HashSet<String>,
25}
26
27impl ParsedExpression {
28    /// The trimmed expression string.
29    pub fn expression(&self) -> &str {
30        &self.expr
31    }
32
33    /// Symbol names accessed by this expression (e.g. `Param.Name`).
34    pub fn accessed_symbols(&self) -> &HashSet<String> {
35        &self.accessed_symbols
36    }
37
38    /// Function names called by this expression (e.g. `len`, `upper`).
39    pub fn called_functions(&self) -> &HashSet<String> {
40        &self.called_functions
41    }
42
43    /// Loop variable names bound by list comprehensions in this expression.
44    pub fn local_bindings(&self) -> &HashSet<String> {
45        &self.local_bindings
46    }
47}
48
49use std::collections::HashSet;
50
51/// Python keywords that may appear as attribute names in OpenJD expressions.
52const PYTHON_KEYWORDS: &[&str] = &[
53    "False", "None", "True", "and", "as", "assert", "async", "await", "break", "class", "continue",
54    "def", "del", "elif", "else", "except", "finally", "for", "from", "global", "if", "import",
55    "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try", "while",
56    "with", "yield",
57];
58
59/// Generate a same-length replacement identifier that doesn't appear in the source.
60fn make_replacement(keyword: &str, source: &str) -> String {
61    // Use a deterministic scheme: prefix with underscores to match length
62    // e.g., "if" → "xf", "def" → "xef", "else" → "xlse"
63    // If that collides, try incrementing the first char
64    let len = keyword.len();
65    for prefix in b'a'..=b'z' {
66        let mut replacement = String::with_capacity(len);
67        replacement.push(prefix as char);
68        for (_i, c) in keyword.chars().enumerate().skip(1) {
69            // Use the original chars but shifted, to maintain length
70            replacement.push(if c.is_alphabetic() { c } else { 'x' });
71        }
72        if replacement.len() == len
73            && !source.contains(&replacement)
74            && !PYTHON_KEYWORDS.contains(&replacement.as_str())
75        {
76            return replacement;
77        }
78    }
79    // Fallback: all x's
80    "x".repeat(len)
81}
82
83/// Parse a Python expression, handling contextual keywords (Python keywords
84/// used as attribute names after '.').
85///
86/// Matches the Python implementation's approach:
87/// 1. Try to parse
88/// 2. On syntax error, find keyword after '.' at error position
89/// 3. Replace with same-length identifier (preserves error positions)
90/// 4. Retry
91/// 5. After success, record renames so evaluator can map back
92impl ParsedExpression {
93    /// Parse an expression using the "latest" profile
94    /// ([`ExprProfile::latest`]): the current revision with *every* known
95    /// extension enabled.
96    ///
97    /// **This constructor is intentionally unstable across crate
98    /// versions.** The set of accepted syntax, functions, and types
99    /// grows as new extensions and revisions land. Use it for ad-hoc
100    /// parsing, prototyping, or when the source of the expression is
101    /// known to target the newest capabilities.
102    ///
103    /// For stability across crate versions, build a profile with an
104    /// explicit revision and extension set and use
105    /// [`with_profile`](Self::with_profile).
106    pub fn new(expr: &str) -> Result<ParsedExpression, ExpressionError> {
107        Self::with_profile(expr, &ExprProfile::latest())
108    }
109
110    /// Parse an expression under a caller-supplied profile.
111    ///
112    /// The profile governs which syntax features the parser accepts
113    /// (determined by its revision and enabled extensions) and — once
114    /// expression-level extensions exist — which functions and types
115    /// are in scope. An expression that parses successfully under one
116    /// profile may fail to parse under another; conversely, a template
117    /// that pinned a specific revision at decode time will continue to
118    /// parse identically across crate upgrades if it is re-parsed
119    /// against the same profile.
120    ///
121    /// # Examples
122    ///
123    /// ```
124    /// use openjd_expr::{ExprProfile, ParsedExpression};
125    ///
126    /// // Stable: this call will parse the same expression the same way
127    /// // regardless of future crate versions.
128    /// let profile = ExprProfile::current();
129    /// let parsed = ParsedExpression::with_profile("1 + 2", &profile)
130    ///     .expect("parses");
131    /// # let _ = parsed;
132    /// ```
133    pub fn with_profile(
134        expr: &str,
135        profile: &ExprProfile,
136    ) -> Result<ParsedExpression, ExpressionError> {
137        let expr_str = expr.trim();
138        if expr_str.is_empty() {
139            return Err(ExpressionError::new("Empty expression"));
140        }
141
142        // Hard cap on parser input length. A recursive-descent parser can
143        // recurse at most once per input character; the 32 MB worker-thread
144        // stack comfortably handles inputs up to
145        // [`MAX_PARSE_INPUT_LEN`], but beyond that we would risk overflow
146        // even in the dedicated thread. Inputs exceeding this size are
147        // rejected without invoking the parser at all.
148        if expr_str.len() > MAX_PARSE_INPUT_LEN {
149            return Err(ExpressionError::new(format!(
150                "Expression source length ({} bytes) exceeds maximum allowed ({} bytes)",
151                expr_str.len(),
152                MAX_PARSE_INPUT_LEN
153            )));
154        }
155
156        // Short-input fast path: a recursive-descent parser cannot recurse
157        // more times than there are input characters to consume, and the
158        // ruff parser's empirical overflow threshold on Rust's default
159        // 2 MB thread stack is well above `FAST_PATH_INPUT_LEN`. Any
160        // source at or below that length is safe to parse on the current
161        // thread regardless of shape. The structural depth walker still
162        // catches excessive AST depth that might arise from the parser's
163        // own node-building patterns.
164        if expr_str.len() <= FAST_PATH_INPUT_LEN {
165            return parse_inner(expr, expr_str, profile);
166        }
167
168        // Longer inputs could drive the parser's recursive descent past
169        // the thread's default stack (Rust cannot recover from stack
170        // overflow — it aborts the process). Run the parse + structural
171        // validation + symbol collection on a dedicated thread with an
172        // enlarged stack. The depth walker still rejects inputs whose AST
173        // exceeds `MAX_EXPRESSION_DEPTH`, so this thread allocation is
174        // purely about surviving long enough to apply that check.
175        let expr_owned = expr.to_string();
176        let profile_owned = profile.clone();
177        let handle = std::thread::Builder::new()
178            .name("openjd-expr-parse".into())
179            .stack_size(PARSER_THREAD_STACK_SIZE)
180            .spawn(move || {
181                let expr_str = expr_owned.trim();
182                parse_inner(&expr_owned, expr_str, &profile_owned)
183            })
184            .map_err(|e| ExpressionError::new(format!("Failed to spawn parser thread: {e}")))?;
185        match handle.join() {
186            Ok(result) => result,
187            Err(_) => Err(ExpressionError::new(
188                "Parser thread panicked while parsing expression",
189            )),
190        }
191    }
192
193    /// Evaluate this parsed expression against a single symbol table.
194    ///
195    /// Convenience shortcut that returns just the value. See
196    /// [`evaluate_with_metrics`](Self::evaluate_with_metrics) to also
197    /// observe resource usage.
198    pub fn evaluate(
199        &self,
200        values: &crate::symbol_table::SymbolTable,
201    ) -> Result<crate::value::ExprValue, crate::error::ExpressionError> {
202        EvalBuilder::new(self).evaluate(&[values])
203    }
204
205    /// Evaluate this parsed expression and return the value alongside
206    /// resource-usage metrics.
207    ///
208    /// Convenience shortcut equivalent to
209    /// `parsed.with_*(default).evaluate_with_metrics(&symtabs)` — call this
210    /// when you don't need any `with_*` configuration.
211    pub fn evaluate_with_metrics(
212        &self,
213        symtabs: &[&crate::symbol_table::SymbolTable],
214    ) -> Result<crate::eval::EvalResult, crate::error::ExpressionError> {
215        EvalBuilder::new(self).evaluate_with_metrics(symtabs)
216    }
217
218    /// Start a configured evaluation using the [`EvalBuilder`].
219    pub fn with_library<'a>(
220        &'a self,
221        library: &'a crate::function_library::FunctionLibrary,
222    ) -> EvalBuilder<'a> {
223        EvalBuilder::new(self).with_library(library)
224    }
225
226    /// Start a configured evaluation with a memory limit.
227    pub fn with_memory_limit(&self, limit: usize) -> EvalBuilder<'_> {
228        EvalBuilder::new(self).with_memory_limit(limit)
229    }
230
231    /// Start a configured evaluation with an operation limit.
232    pub fn with_operation_limit(&self, limit: usize) -> EvalBuilder<'_> {
233        EvalBuilder::new(self).with_operation_limit(limit)
234    }
235
236    /// Start a configured evaluation with an explicit path format.
237    pub fn with_path_format(&self, format: crate::path_mapping::PathFormat) -> EvalBuilder<'_> {
238        EvalBuilder::new(self).with_path_format(format)
239    }
240
241    /// Start a configured evaluation with a target type for coercion.
242    pub fn with_target_type<'a>(
243        &'a self,
244        target_type: &'a crate::types::ExprType,
245    ) -> EvalBuilder<'a> {
246        EvalBuilder::new(self).with_target_type(target_type)
247    }
248
249    /// Create an Evaluator pre-configured with this expression's internal
250    /// state (keyword renames, source context).
251    ///
252    /// Private: called only by [`EvalBuilder::build_evaluator`].
253    /// The public API routes through `with_*(...).evaluate(&symtabs)` on
254    /// `ParsedExpression` / `EvalBuilder`.
255    fn evaluator<'a>(
256        &'a self,
257        symtabs: &'a [&'a crate::symbol_table::SymbolTable],
258    ) -> crate::eval::Evaluator<'a> {
259        crate::eval::Evaluator::new(symtabs)
260            .with_keyword_renames(&self.keyword_renames)
261            .with_expr_source(&self.expr)
262    }
263
264    /// If this expression is a simple dotted name (e.g. `Param.Name`),
265    /// return it as a string. Returns `None` for anything more complex
266    /// (arithmetic, function calls, subscripts, etc.).
267    pub fn as_name_lookup(&self) -> Option<&str> {
268        if self.called_functions.is_empty()
269            && self.local_bindings.is_empty()
270            && self.accessed_symbols.len() == 1
271        {
272            // Verify the AST is actually just an attribute chain / bare name
273            if build_symbol_name(&self.ast, &self.keyword_renames).is_some() {
274                return self.accessed_symbols.iter().next().map(|s| s.as_str());
275            }
276        }
277        None
278    }
279}
280
281/// A builder bound to a [`ParsedExpression`] that defers choosing symbol
282/// tables until [`evaluate`](Self::evaluate) is called.
283///
284/// Obtained from `parsed.with_*(...)` on a [`ParsedExpression`]. Every
285/// `with_*` method is chainable. The terminal `evaluate(&symtabs)` and
286/// `evaluate_with_metrics(&symtabs)` methods build an internal evaluator
287/// from the captured configuration and run it.
288///
289/// ```
290/// use openjd_expr::{ExprValue, ParsedExpression, PathFormat, SymbolTable};
291///
292/// let parsed = ParsedExpression::new("Param.Frame * 2").unwrap();
293/// let mut st = SymbolTable::new();
294/// st.set("Param.Frame", 5).unwrap();
295///
296/// let value = parsed
297///     .with_path_format(PathFormat::Posix)
298///     .with_memory_limit(50_000_000)
299///     .evaluate(&[&st])
300///     .unwrap();
301/// assert_eq!(value, ExprValue::Int(10));
302/// ```
303#[must_use]
304pub struct EvalBuilder<'a> {
305    parsed: &'a ParsedExpression,
306    library: Option<&'a crate::function_library::FunctionLibrary>,
307    memory_limit: Option<usize>,
308    operation_limit: Option<usize>,
309    path_format: Option<crate::path_mapping::PathFormat>,
310    target_type: Option<&'a crate::types::ExprType>,
311}
312
313impl<'a> EvalBuilder<'a> {
314    fn new(parsed: &'a ParsedExpression) -> Self {
315        Self {
316            parsed,
317            library: None,
318            memory_limit: None,
319            operation_limit: None,
320            path_format: None,
321            target_type: None,
322        }
323    }
324
325    /// Use the given function library instead of the default one.
326    pub fn with_library(mut self, library: &'a crate::function_library::FunctionLibrary) -> Self {
327        self.library = Some(library);
328        self
329    }
330
331    /// Cap evaluation memory (bytes). Default: [`DEFAULT_MEMORY_LIMIT`](crate::DEFAULT_MEMORY_LIMIT).
332    pub fn with_memory_limit(mut self, limit: usize) -> Self {
333        self.memory_limit = Some(limit);
334        self
335    }
336
337    /// Cap evaluation operations. Default: [`DEFAULT_OPERATION_LIMIT`](crate::DEFAULT_OPERATION_LIMIT).
338    pub fn with_operation_limit(mut self, limit: usize) -> Self {
339        self.operation_limit = Some(limit);
340        self
341    }
342
343    /// Normalize path values according to `format`. Default: host-native.
344    pub fn with_path_format(mut self, format: crate::path_mapping::PathFormat) -> Self {
345        self.path_format = Some(format);
346        self
347    }
348
349    /// Coerce the final value toward `target_type`. Default: no coercion.
350    pub fn with_target_type(mut self, target_type: &'a crate::types::ExprType) -> Self {
351        self.target_type = Some(target_type);
352        self
353    }
354
355    /// Build the configured evaluator against the supplied symbol tables.
356    ///
357    /// Private helper shared by `evaluate` and `evaluate_with_metrics`.
358    fn build_evaluator(
359        &self,
360        symtabs: &'a [&'a crate::symbol_table::SymbolTable],
361    ) -> crate::eval::Evaluator<'a> {
362        let mut ev = self.parsed.evaluator(symtabs);
363        if let Some(lib) = self.library {
364            ev = ev.with_library(lib);
365        }
366        if let Some(limit) = self.memory_limit {
367            ev = ev.with_memory_limit(limit);
368        }
369        if let Some(limit) = self.operation_limit {
370            ev = ev.with_operation_limit(limit);
371        }
372        if let Some(fmt) = self.path_format {
373            ev = ev.with_path_format(fmt);
374        }
375        if let Some(tt) = self.target_type {
376            ev = ev.with_target_type(tt);
377        }
378        ev
379    }
380
381    /// Evaluate against the supplied symbol tables and return the resulting value.
382    pub fn evaluate(
383        self,
384        symtabs: &'a [&'a crate::symbol_table::SymbolTable],
385    ) -> Result<crate::value::ExprValue, crate::error::ExpressionError> {
386        let mut ev = self.build_evaluator(symtabs);
387        ev.evaluate(&self.parsed.ast)
388    }
389
390    /// Evaluate and return the value alongside resource-usage metrics.
391    pub fn evaluate_with_metrics(
392        self,
393        symtabs: &'a [&'a crate::symbol_table::SymbolTable],
394    ) -> Result<crate::eval::EvalResult, crate::error::ExpressionError> {
395        let mut ev = self.build_evaluator(symtabs);
396        let value = ev.evaluate(&self.parsed.ast)?;
397        Ok(crate::eval::EvalResult {
398            value,
399            peak_memory: ev.peak_memory(),
400            operation_count: ev.operation_count(),
401        })
402    }
403}
404
405/// Build a dotted symbol name from an attribute chain, resolving keyword renames.
406fn build_symbol_name(expr: &ast::Expr, renames: &HashMap<String, String>) -> Option<String> {
407    match expr {
408        ast::Expr::Name(n) => {
409            let name = n.id.as_str();
410            Some(
411                renames
412                    .get(name)
413                    .cloned()
414                    .unwrap_or_else(|| name.to_string()),
415            )
416        }
417        ast::Expr::Attribute(a) => {
418            let base = build_symbol_name(&a.value, renames)?;
419            let attr = a.attr.as_str();
420            let attr_resolved = renames
421                .get(attr)
422                .cloned()
423                .unwrap_or_else(|| attr.to_string());
424            Some(format!("{base}.{attr_resolved}"))
425        }
426        _ => None,
427    }
428}
429
430/// Maximum permitted AST nesting depth.
431///
432/// Applied during structural validation (parse phase) and during evaluation
433/// to prevent stack exhaustion on pathological inputs such as
434/// `((((...1...))))` or long left-associative binop chains like
435/// `1+1+...+1` that produce a deep AST even when the source is short.
436///
437/// A limit of 64 is well above any legitimate template expression: the
438/// spec already caps list nesting at 2 and list comprehensions at a
439/// single generator, and real-world expressions rarely exceed ~10
440/// levels of nesting. Exceeding this limit raises
441/// [`ExpressionErrorKind::ExpressionTooDeep`](crate::ExpressionErrorKind::ExpressionTooDeep).
442///
443/// See `specs/expr/parser.md` (Depth limit) and `specs/expr/evaluator.md`
444/// (Depth limit) for the rationale.
445pub const MAX_EXPRESSION_DEPTH: usize = 64;
446
447/// Input-length threshold below which [`ParsedExpression::new`] parses on
448/// the current thread instead of spawning a worker thread.
449///
450/// A recursive-descent parser cannot recurse more times than there are
451/// input characters to consume, and the ruff parser's empirical overflow
452/// threshold on the default 2 MB Rust thread stack is well above this
453/// value in release builds (≥ 500 frames observed). 200 characters
454/// leaves a comfortable safety margin in both release and debug profiles
455/// while keeping the vast majority of real template expressions — which
456/// are typically well under 100 characters — on the fast path.
457///
458/// This is deliberately separate from [`MAX_EXPRESSION_DEPTH`]: the depth
459/// limit is a semantic abuse ceiling on AST nesting, while this threshold
460/// is a pragmatic tuning knob for parser stack-safety on the caller's
461/// thread.
462const FAST_PATH_INPUT_LEN: usize = 200;
463
464/// Maximum permitted source length (in bytes) for a single
465/// [`ParsedExpression`]. An absolute cap independent of any AST-shape
466/// analysis: the parser's recursive descent can recurse at most once per
467/// input byte, so bounding input length bounds worst-case stack use. Set
468/// to a value that leaves generous headroom in a debug-profile build on
469/// the parser worker thread (32 MB stack).
470///
471/// Realistic template expressions are well under 1 KB; 64 KB is two
472/// orders of magnitude above that.
473pub const MAX_PARSE_INPUT_LEN: usize = 64 * 1024;
474
475/// Stack size for the parser thread used when the source string is longer
476/// than [`FAST_PATH_INPUT_LEN`]. 32 MB is large enough to handle any input
477/// up to [`MAX_PARSE_INPUT_LEN`] even in debug builds, where each parser
478/// frame can be several KB. In release builds this leaves orders of
479/// magnitude of headroom; the allocation is a one-time reservation per
480/// parse and is reclaimed when the thread ends.
481const PARSER_THREAD_STACK_SIZE: usize = 32 * 1024 * 1024;
482
483/// The actual parse loop: runs the ruff parser, handles the
484/// keyword-rename retry cycle, and wires up the resulting AST into a
485/// `ParsedExpression`. Factored out of [`ParsedExpression::new`] so it can
486/// be invoked either on the current thread (short inputs) or on a
487/// stack-enlarged worker thread (long inputs), without duplicating the
488/// loop body.
489fn parse_inner(
490    raw: &str,
491    expr_str: &str,
492    profile: &ExprProfile,
493) -> Result<ParsedExpression, ExpressionError> {
494    let mut source = expr_str.to_string();
495    let mut keyword_renames: HashMap<String, String> = HashMap::new();
496
497    // Wrap multi-line expressions in parentheses for implicit line continuation
498    let is_multiline = source.contains('\n');
499
500    loop {
501        let to_parse = if is_multiline {
502            format!("({})", source)
503        } else {
504            source.clone()
505        };
506
507        match ruff_python_parser::parse_expression(&to_parse) {
508            Ok(parsed) => {
509                let expr_node = parsed.into_expr();
510                // Structural validation (matches Python implementation)
511                validate_structure(&expr_node, expr_str, profile)?;
512                let mut accessed_symbols = HashSet::new();
513                let mut called_functions = HashSet::new();
514                let mut local_bindings = HashSet::new();
515                collect_symbols(
516                    &expr_node,
517                    &keyword_renames,
518                    &mut accessed_symbols,
519                    &mut called_functions,
520                    &mut local_bindings,
521                );
522                // Check for nested comprehension variable shadowing
523                check_comprehension_shadowing(&expr_node, &HashSet::new(), expr_str)?;
524                return Ok(ParsedExpression {
525                    ast: expr_node,
526                    source: raw.to_string(),
527                    expr: expr_str.to_string(),
528                    keyword_renames,
529                    accessed_symbols,
530                    called_functions,
531                    local_bindings,
532                });
533            }
534            Err(e) => {
535                // Try to find a keyword after '.' that caused the error
536                let mut found = false;
537                for kw in PYTHON_KEYWORDS {
538                    let pattern = format!(".{kw}");
539                    if let Some(pos) = source.find(&pattern) {
540                        // Check that the keyword is followed by a non-identifier char
541                        let after_pos = pos + 1 + kw.len();
542                        let after_char = source.chars().nth(after_pos);
543                        if after_char.is_none_or(|c| !c.is_alphanumeric() && c != '_') {
544                            let replacement = keyword_renames
545                                .iter()
546                                .find(|(_, v)| v.as_str() == *kw)
547                                .map(|(k, _)| k.clone())
548                                .unwrap_or_else(|| {
549                                    let r = make_replacement(kw, &source);
550                                    keyword_renames.insert(r.clone(), kw.to_string());
551                                    r
552                                });
553                            // Replace in source — same length, preserves positions
554                            source = format!(
555                                "{}.{}{}",
556                                &source[..pos],
557                                replacement,
558                                &source[after_pos..]
559                            );
560                            found = true;
561                            break;
562                        }
563                    }
564                }
565                if !found {
566                    let msg = format!("Syntax error: {}", e.error);
567                    let start = e.location.start().to_usize();
568                    let end = e.location.end().to_usize();
569                    // For zero-width ranges (like EOF), point at the last char
570                    let (col, end_col) = if start == end && !source.is_empty() {
571                        (0, source.len())
572                    } else {
573                        (start.min(source.len()), end.min(source.len()))
574                    };
575                    let mut err = ExpressionError::new(msg);
576                    if !source.is_empty() {
577                        err = err.with_span(&source, col, end_col.max(col + 1));
578                    }
579                    return Err(err);
580                }
581            }
582        }
583    }
584}
585
586/// Validate structural constraints on the AST that can't be caught by the parser.
587fn validate_structure(
588    node: &ast::Expr,
589    source: &str,
590    profile: &ExprProfile,
591) -> Result<(), ExpressionError> {
592    validate_structure_inner(node, source, 0, profile)
593}
594
595fn validate_structure_inner(
596    node: &ast::Expr,
597    source: &str,
598    depth: usize,
599    profile: &ExprProfile,
600) -> Result<(), ExpressionError> {
601    if depth > MAX_EXPRESSION_DEPTH {
602        return Err(
603            ExpressionError::expression_too_deep(depth, MAX_EXPRESSION_DEPTH)
604                .with_node(source, node),
605        );
606    }
607    fn err(msg: &str, source: &str, node: &ast::Expr) -> Result<(), ExpressionError> {
608        Err(ExpressionError::new(msg).with_node(source, node))
609    }
610    match node {
611        ast::Expr::ListComp(lc) => {
612            if lc.generators.len() > 1 && !profile.allows_syntax(SyntaxFeature::MultipleForClauses)
613            {
614                return Err(ExpressionError::new(
615                    "Multiple 'for' clauses in list comprehensions are not supported",
616                )
617                .with_span(source, 0, source.len()));
618            }
619            for gen in &lc.generators {
620                if matches!(&gen.target, ast::Expr::Tuple(_))
621                    && !profile.allows_syntax(SyntaxFeature::TupleUnpackingInComprehension)
622                {
623                    return Err(ExpressionError::new(
624                        "Tuple unpacking in list comprehension is not supported",
625                    )
626                    .with_node(source, &gen.target));
627                }
628                if gen.ifs.len() > 1 && !profile.allows_syntax(SyntaxFeature::MultipleIfClauses) {
629                    return Err(ExpressionError::new(
630                        "Multiple 'if' clauses in a list comprehension are not supported; combine with 'and'",
631                    ).with_span(source, 0, source.len()));
632                }
633                if let ast::Expr::Name(n) = &gen.target {
634                    let name = n.id.as_str();
635                    if !name.starts_with(|c: char| c.is_ascii_lowercase() || c == '_') {
636                        return Err(ExpressionError::new(format!(
637                            "Loop variable '{}' must start with a lowercase letter or underscore",
638                            name
639                        ))
640                        .with_node(source, &gen.target));
641                    }
642                }
643            }
644            validate_structure_inner(&lc.elt, source, depth + 1, profile)?;
645            for gen in &lc.generators {
646                validate_structure_inner(&gen.iter, source, depth + 1, profile)?;
647                for if_clause in &gen.ifs {
648                    validate_structure_inner(if_clause, source, depth + 1, profile)?;
649                }
650            }
651        }
652        ast::Expr::BinOp(b) => {
653            match b.op {
654                ast::Operator::BitAnd if !profile.allows_syntax(SyntaxFeature::BitwiseAnd) => {
655                    return err("Bitwise AND (&) is not supported", source, node)
656                }
657                ast::Operator::BitOr if !profile.allows_syntax(SyntaxFeature::BitwiseOr) => {
658                    return err("Bitwise OR (|) is not supported", source, node)
659                }
660                ast::Operator::BitXor if !profile.allows_syntax(SyntaxFeature::BitwiseXor) => {
661                    return err("Bitwise XOR (^) is not supported", source, node)
662                }
663                ast::Operator::LShift if !profile.allows_syntax(SyntaxFeature::LeftShift) => {
664                    return err("Left shift (<<) is not supported", source, node)
665                }
666                ast::Operator::RShift if !profile.allows_syntax(SyntaxFeature::RightShift) => {
667                    return err("Right shift (>>) is not supported", source, node)
668                }
669                ast::Operator::MatMult if !profile.allows_syntax(SyntaxFeature::MatMult) => {
670                    return err("Matrix multiply (@) is not supported", source, node)
671                }
672                _ => {}
673            }
674            validate_structure_inner(&b.left, source, depth + 1, profile)?;
675            validate_structure_inner(&b.right, source, depth + 1, profile)?;
676        }
677        ast::Expr::UnaryOp(u) => {
678            if matches!(u.op, ast::UnaryOp::Invert)
679                && !profile.allows_syntax(SyntaxFeature::BitwiseNot)
680            {
681                return err("Bitwise NOT (~) is not supported", source, node);
682            }
683            validate_structure_inner(&u.operand, source, depth + 1, profile)?;
684        }
685        ast::Expr::BoolOp(b) => {
686            for v in &b.values {
687                validate_structure_inner(v, source, depth + 1, profile)?;
688            }
689        }
690        ast::Expr::Compare(c) => {
691            for op in &c.ops {
692                match op {
693                    ast::CmpOp::Is if !profile.allows_syntax(SyntaxFeature::IsOperator) => {
694                        return err("'is' operator is not supported; use '=='", source, node)
695                    }
696                    ast::CmpOp::IsNot if !profile.allows_syntax(SyntaxFeature::IsNotOperator) => {
697                        return err("'is not' operator is not supported; use '!='", source, node)
698                    }
699                    _ => {}
700                }
701            }
702            validate_structure_inner(&c.left, source, depth + 1, profile)?;
703            // Chained comparisons (1<2<3<...) also grow without bound via the
704            // comparators vector; treat each comparator as +1 depth to catch
705            // `1<2<3<...<N` patterns that would otherwise be O(N) wide but
706            // shallow in terms of recursion.
707            if c.comparators.len() > MAX_EXPRESSION_DEPTH {
708                return Err(ExpressionError::expression_too_deep(
709                    c.comparators.len(),
710                    MAX_EXPRESSION_DEPTH,
711                )
712                .with_node(source, node));
713            }
714            for comp in &c.comparators {
715                validate_structure_inner(comp, source, depth + 1, profile)?;
716            }
717        }
718        ast::Expr::If(i) => {
719            validate_structure_inner(&i.test, source, depth + 1, profile)?;
720            validate_structure_inner(&i.body, source, depth + 1, profile)?;
721            validate_structure_inner(&i.orelse, source, depth + 1, profile)?;
722        }
723        ast::Expr::Call(c) => {
724            validate_structure_inner(&c.func, source, depth + 1, profile)?;
725            for arg in &c.arguments.args {
726                validate_structure_inner(arg, source, depth + 1, profile)?;
727            }
728            if !c.arguments.keywords.is_empty()
729                && !profile.allows_syntax(SyntaxFeature::KeywordArguments)
730            {
731                return err("Keyword arguments are not supported", source, node);
732            }
733        }
734        ast::Expr::List(l) => {
735            for elt in &l.elts {
736                validate_structure_inner(elt, source, depth + 1, profile)?;
737            }
738        }
739        ast::Expr::Subscript(s) => {
740            validate_structure_inner(&s.value, source, depth + 1, profile)?;
741            validate_structure_inner(&s.slice, source, depth + 1, profile)?;
742        }
743        ast::Expr::Attribute(a) => {
744            validate_structure_inner(&a.value, source, depth + 1, profile)?;
745        }
746        ast::Expr::StringLiteral(s) => {
747            for part in &s.value {
748                if matches!(
749                    part.flags.prefix(),
750                    ruff_python_ast::str_prefix::StringLiteralPrefix::Unicode
751                ) && !profile.allows_syntax(SyntaxFeature::UnicodeStringPrefix)
752                {
753                    return Err(ExpressionError::new(
754                        "Unicode string prefix u'...' is not supported. Use '...' or \"...\" instead."
755                    ).with_node(source, &ast::Expr::StringLiteral(s.clone())));
756                }
757            }
758        }
759        ast::Expr::BytesLiteral(b) if !profile.allows_syntax(SyntaxFeature::BytesLiteral) => {
760            return Err(ExpressionError::new(
761                "Byte strings (b'...') are not supported. Use '...' or \"...\" instead.",
762            )
763            .with_node(source, &ast::Expr::BytesLiteral(b.clone())));
764        }
765        // Unsupported expression types
766        ast::Expr::Named(_) if !profile.allows_syntax(SyntaxFeature::Walrus) => {
767            return err("Walrus operator (:=) is not supported", source, node)
768        }
769        ast::Expr::Lambda(_) if !profile.allows_syntax(SyntaxFeature::Lambda) => {
770            return err("Lambda expressions are not supported", source, node)
771        }
772        ast::Expr::Tuple(_) if !profile.allows_syntax(SyntaxFeature::TupleLiteral) => {
773            return err(
774                "Tuple literals are not supported; use a list instead",
775                source,
776                node,
777            )
778        }
779        ast::Expr::Dict(_) if !profile.allows_syntax(SyntaxFeature::DictLiteral) => {
780            return err("Dict literals are not supported", source, node)
781        }
782        ast::Expr::Set(_) if !profile.allows_syntax(SyntaxFeature::SetLiteral) => {
783            return err("Set literals are not supported", source, node)
784        }
785        ast::Expr::DictComp(_) if !profile.allows_syntax(SyntaxFeature::DictComprehension) => {
786            return err(
787                "Dict comprehensions are not supported; only list comprehensions are allowed",
788                source,
789                node,
790            )
791        }
792        ast::Expr::SetComp(_) if !profile.allows_syntax(SyntaxFeature::SetComprehension) => {
793            return err(
794                "Set comprehensions are not supported; only list comprehensions are allowed",
795                source,
796                node,
797            )
798        }
799        ast::Expr::Generator(_) if !profile.allows_syntax(SyntaxFeature::GeneratorExpression) => {
800            return err(
801                "Generator expressions are not supported; use a list comprehension",
802                source,
803                node,
804            )
805        }
806        ast::Expr::FString(_) if !profile.allows_syntax(SyntaxFeature::FString) => {
807            return err(
808                "f-strings are not supported; use string concatenation",
809                source,
810                node,
811            )
812        }
813        ast::Expr::EllipsisLiteral(_) if !profile.allows_syntax(SyntaxFeature::Ellipsis) => {
814            return err("Ellipsis (...) is not supported", source, node)
815        }
816        ast::Expr::Starred(_) if !profile.allows_syntax(SyntaxFeature::Starred) => {
817            return err("Star expressions are not supported", source, node)
818        }
819        ast::Expr::Await(_) if !profile.allows_syntax(SyntaxFeature::Await) => {
820            return err("Await expressions are not supported", source, node)
821        }
822        _ => {}
823    }
824    Ok(())
825}
826
827/// Check that nested comprehensions don't shadow outer comprehension variables.
828fn check_comprehension_shadowing(
829    node: &ast::Expr,
830    outer_scope: &HashSet<String>,
831    source: &str,
832) -> Result<(), ExpressionError> {
833    match node {
834        ast::Expr::ListComp(lc) => {
835            let mut comp_bindings = HashSet::new();
836            for gen in &lc.generators {
837                if let ast::Expr::Name(n) = &gen.target {
838                    let name = n.id.to_string();
839                    if outer_scope.contains(&name) {
840                        return Err(ExpressionError::new(
841                            format!("List comprehension variable '{}' shadows an outer comprehension variable", name),
842                        ).with_span(source, 0, source.len()));
843                    }
844                    comp_bindings.insert(name);
845                }
846            }
847            let new_scope: HashSet<String> = outer_scope.union(&comp_bindings).cloned().collect();
848            check_comprehension_shadowing(&lc.elt, &new_scope, source)?;
849            for gen in &lc.generators {
850                check_comprehension_shadowing(&gen.iter, outer_scope, source)?;
851                for if_clause in &gen.ifs {
852                    check_comprehension_shadowing(if_clause, &new_scope, source)?;
853                }
854            }
855        }
856        ast::Expr::BinOp(b) => {
857            check_comprehension_shadowing(&b.left, outer_scope, source)?;
858            check_comprehension_shadowing(&b.right, outer_scope, source)?;
859        }
860        ast::Expr::UnaryOp(u) => {
861            check_comprehension_shadowing(&u.operand, outer_scope, source)?;
862        }
863        ast::Expr::BoolOp(b) => {
864            for v in &b.values {
865                check_comprehension_shadowing(v, outer_scope, source)?;
866            }
867        }
868        ast::Expr::Compare(c) => {
869            check_comprehension_shadowing(&c.left, outer_scope, source)?;
870            for comp in &c.comparators {
871                check_comprehension_shadowing(comp, outer_scope, source)?;
872            }
873        }
874        ast::Expr::If(i) => {
875            check_comprehension_shadowing(&i.test, outer_scope, source)?;
876            check_comprehension_shadowing(&i.body, outer_scope, source)?;
877            check_comprehension_shadowing(&i.orelse, outer_scope, source)?;
878        }
879        ast::Expr::Call(c) => {
880            check_comprehension_shadowing(&c.func, outer_scope, source)?;
881            for arg in &c.arguments.args {
882                check_comprehension_shadowing(arg, outer_scope, source)?;
883            }
884        }
885        ast::Expr::List(l) => {
886            for elt in &l.elts {
887                check_comprehension_shadowing(elt, outer_scope, source)?;
888            }
889        }
890        ast::Expr::Subscript(s) => {
891            check_comprehension_shadowing(&s.value, outer_scope, source)?;
892            check_comprehension_shadowing(&s.slice, outer_scope, source)?;
893        }
894        _ => {}
895    }
896    Ok(())
897}
898
899/// Collect accessed symbols, called functions, and local bindings from an AST.
900/// Symbol collection is structural: names in call position are functions, not symbols.
901fn collect_symbols(
902    node: &ast::Expr,
903    renames: &HashMap<String, String>,
904    symbols: &mut HashSet<String>,
905    functions: &mut HashSet<String>,
906    locals: &mut HashSet<String>,
907) {
908    match node {
909        ast::Expr::Name(n) => {
910            let name = renames
911                .get(n.id.as_str())
912                .cloned()
913                .unwrap_or_else(|| n.id.to_string());
914            // Constants are not symbols
915            if name == "True"
916                || name == "False"
917                || name == "None"
918                || name == "true"
919                || name == "false"
920                || name == "null"
921            {
922                return;
923            }
924            if !locals.contains(&name) {
925                symbols.insert(name);
926            }
927        }
928        ast::Expr::Attribute(a) => {
929            // Collect full dotted path as a symbol
930            if let Some(full_name) = build_symbol_name(node, renames) {
931                let base = full_name.split('.').next().unwrap_or("");
932                if !locals.contains(base)
933                    && base != "True"
934                    && base != "False"
935                    && base != "None"
936                    && base != "true"
937                    && base != "false"
938                    && base != "null"
939                {
940                    symbols.insert(full_name);
941                }
942            } else {
943                // Can't build dotted name (e.g. (1+2).attr) — recurse into value
944                collect_symbols(&a.value, renames, symbols, functions, locals);
945            }
946        }
947        ast::Expr::Call(c) => {
948            // Function/method calls: the func itself is NOT a symbol reference
949            match &*c.func {
950                ast::Expr::Name(n) => {
951                    // Bare function call: name is a function, not a symbol
952                    let name = renames
953                        .get(n.id.as_str())
954                        .cloned()
955                        .unwrap_or_else(|| n.id.to_string());
956                    functions.insert(name);
957                }
958                ast::Expr::Attribute(a) => {
959                    // Method call: collect method name as function, recurse into receiver as symbol
960                    let method = a.attr.as_str();
961                    let method_resolved = renames
962                        .get(method)
963                        .cloned()
964                        .unwrap_or_else(|| method.to_string());
965                    functions.insert(method_resolved);
966                    // The receiver (a.value) IS a symbol reference
967                    collect_symbols(&a.value, renames, symbols, functions, locals);
968                }
969                _ => collect_symbols(&c.func, renames, symbols, functions, locals),
970            }
971            for arg in &c.arguments.args {
972                collect_symbols(arg, renames, symbols, functions, locals);
973            }
974        }
975        ast::Expr::BinOp(b) => {
976            collect_symbols(&b.left, renames, symbols, functions, locals);
977            collect_symbols(&b.right, renames, symbols, functions, locals);
978        }
979        ast::Expr::UnaryOp(u) => {
980            collect_symbols(&u.operand, renames, symbols, functions, locals);
981        }
982        ast::Expr::BoolOp(b) => {
983            for v in &b.values {
984                collect_symbols(v, renames, symbols, functions, locals);
985            }
986        }
987        ast::Expr::Compare(c) => {
988            collect_symbols(&c.left, renames, symbols, functions, locals);
989            for comp in &c.comparators {
990                collect_symbols(comp, renames, symbols, functions, locals);
991            }
992        }
993        ast::Expr::If(i) => {
994            collect_symbols(&i.test, renames, symbols, functions, locals);
995            collect_symbols(&i.body, renames, symbols, functions, locals);
996            collect_symbols(&i.orelse, renames, symbols, functions, locals);
997        }
998        ast::Expr::List(l) => {
999            for elt in &l.elts {
1000                collect_symbols(elt, renames, symbols, functions, locals);
1001            }
1002        }
1003        ast::Expr::Subscript(s) => {
1004            collect_symbols(&s.value, renames, symbols, functions, locals);
1005            collect_symbols(&s.slice, renames, symbols, functions, locals);
1006        }
1007        ast::Expr::Slice(s) => {
1008            if let Some(l) = &s.lower {
1009                collect_symbols(l, renames, symbols, functions, locals);
1010            }
1011            if let Some(u) = &s.upper {
1012                collect_symbols(u, renames, symbols, functions, locals);
1013            }
1014            if let Some(st) = &s.step {
1015                collect_symbols(st, renames, symbols, functions, locals);
1016            }
1017        }
1018        ast::Expr::ListComp(lc) => {
1019            // Visit iterables first (before adding loop vars)
1020            for gen in &lc.generators {
1021                collect_symbols(&gen.iter, renames, symbols, functions, locals);
1022            }
1023            // Add loop variables to local scope
1024            for gen in &lc.generators {
1025                if let ast::Expr::Name(n) = &gen.target {
1026                    locals.insert(n.id.to_string());
1027                }
1028            }
1029            // Visit element expression and filters (with loop vars in scope)
1030            collect_symbols(&lc.elt, renames, symbols, functions, locals);
1031            for gen in &lc.generators {
1032                for if_clause in &gen.ifs {
1033                    collect_symbols(if_clause, renames, symbols, functions, locals);
1034                }
1035            }
1036            // Note: we don't remove loop vars from locals — they persist for
1037            // the local_bindings set. This matches Python's collect behavior.
1038        }
1039        ast::Expr::Starred(s) => {
1040            collect_symbols(&s.value, renames, symbols, functions, locals);
1041        }
1042        // Literals — no symbols
1043        ast::Expr::NumberLiteral(_)
1044        | ast::Expr::StringLiteral(_)
1045        | ast::Expr::BooleanLiteral(_)
1046        | ast::Expr::NoneLiteral(_)
1047        | ast::Expr::EllipsisLiteral(_)
1048        | ast::Expr::BytesLiteral(_) => {}
1049        _ => {}
1050    }
1051}
1052
1053#[cfg(test)]
1054mod tests {
1055    use super::*;
1056
1057    #[test]
1058    fn parse_simple_arithmetic() {
1059        let parsed = ParsedExpression::new("1 + 2").unwrap();
1060        assert!(matches!(parsed.ast, ast::Expr::BinOp(_)));
1061    }
1062
1063    #[test]
1064    fn parse_attribute_access() {
1065        let parsed = ParsedExpression::new("Param.Name").unwrap();
1066        assert!(matches!(parsed.ast, ast::Expr::Attribute(_)));
1067    }
1068
1069    #[test]
1070    fn parse_function_call() {
1071        let parsed = ParsedExpression::new("len(Param.Files)").unwrap();
1072        assert!(matches!(parsed.ast, ast::Expr::Call(_)));
1073    }
1074
1075    #[test]
1076    fn parse_contextual_keyword_if() {
1077        let parsed = ParsedExpression::new("Param.if").unwrap();
1078        assert!(matches!(parsed.ast, ast::Expr::Attribute(_)));
1079        assert!(!parsed.keyword_renames.is_empty());
1080    }
1081
1082    #[test]
1083    fn parse_contextual_keyword_def() {
1084        let parsed = ParsedExpression::new("Param.def").unwrap();
1085        assert!(!parsed.keyword_renames.is_empty());
1086    }
1087
1088    #[test]
1089    fn parse_conditional() {
1090        let parsed = ParsedExpression::new("Param.X if Param.Flag else Param.Y").unwrap();
1091        assert!(matches!(parsed.ast, ast::Expr::If(_)));
1092    }
1093
1094    #[test]
1095    fn parse_empty_error() {
1096        assert!(ParsedExpression::new("").is_err());
1097    }
1098
1099    #[test]
1100    fn parse_syntax_error() {
1101        assert!(ParsedExpression::new("1 +").is_err());
1102    }
1103
1104    #[test]
1105    fn keyword_replacement_same_length() {
1106        let parsed = ParsedExpression::new("Param.if").unwrap();
1107        for (replacement, original) in &parsed.keyword_renames {
1108            assert_eq!(
1109                replacement.len(),
1110                original.len(),
1111                "Replacement '{}' must be same length as original '{}'",
1112                replacement,
1113                original
1114            );
1115        }
1116    }
1117}