Skip to main content

plsql_ir/
expr.rs

1//! IR for PL/SQL expressions and name references.
2//!
3//! Sibling of `stmt`: where statements carry raw
4//! `rhs_text` / `cond_text` slices, the expression IR shipped
5//! here lets downstream passes (lineage, bindgen, SAST) reason
6//! about expression structure without re-tokenising.
7//!
8//! The expression grammar is intentionally conservative — every
9//! shape recognised here is one we've found in the lab corpus
10//! and the synthetic L1 / L2 fixtures. Anything outside this set
11//! lowers to [`Expr::Raw`] with the original text, mirroring the
12//! `Statement::Unrecognized` posture from.
13//!
14//! ## /oracle evidence
15//!
16//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
17//!   recognised reference shapes
18//!   (`<ident>`, `<schema>.<obj>`, `<schema>.<obj>.<member>`,
19//!   `<table>(<args>)` for function calls and array access) and
20//!   the operator precedence table for binary ops come from the
21//!   PL/SQL Language Reference chapter on expressions.
22//! * `LOW-LEVEL-CATALOGS.md` Data Dictionary View Families —
23//!   `ALL_IDENTIFIERS` is the PL/Scope-side view that later
24//!   passes cross-check our reference resolution against.
25
26use serde::{Deserialize, Serialize};
27
28/// One PL/SQL expression node.
29#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
30#[serde(tag = "kind", rename_all = "snake_case")]
31pub enum Expr {
32    /// `NULL` literal.
33    Null,
34    /// Boolean literal — `TRUE` or `FALSE`.
35    BoolLit(bool),
36    /// Integer literal preserved verbatim so downstream consumers
37    /// can decide between `i32` / `i64` / `Decimal` without losing
38    /// precision.
39    IntLit(String),
40    /// Floating-point or fixed-point literal preserved verbatim.
41    FloatLit(String),
42    /// String literal — body without surrounding quotes; doubled
43    /// `''` already de-escaped to single `'`.
44    StringLit(String),
45    /// Date / timestamp / interval literal — the kind tag is the
46    /// keyword (`DATE`, `TIMESTAMP`, `INTERVAL`).
47    DateTimeLit { keyword: String, body: String },
48    /// Bind placeholder — `:1` or `:name`.
49    BindRef(String),
50    /// Substitution variable — `&name` or `&&name`.
51    SubstitutionRef { name: String, sticky: bool },
52    /// Name reference. `parts` is the dotted path
53    /// (`schema.package.member` etc.) in source order, case-folded
54    /// for the lookup key but `display` preserved for diagnostics.
55    Name(NameRef),
56    /// `<callee>(<args>)` — function or procedure call. Also covers
57    /// table / record accessors (`tab(i)`).
58    Call { callee: NameRef, args: Vec<Expr> },
59    /// Binary operator. Operands lower to inner expressions; the
60    /// operator is the canonical PL/SQL spelling
61    /// (`+`, `-`, `*`, `/`, `||`, `=`, `<>`, `<`, `<=`, `>`,
62    /// `>=`, `AND`, `OR`, `LIKE`, `IS`, `MEMBER OF`).
63    Binary {
64        op: String,
65        lhs: Box<Expr>,
66        rhs: Box<Expr>,
67    },
68    /// Unary operator — `NOT`, `-`, `+`.
69    Unary { op: String, operand: Box<Expr> },
70    /// Catch-all for shapes the recognizer can't classify.
71    Raw {
72        text: String,
73        reason: UnknownExprReason,
74    },
75}
76
77#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
78pub struct NameRef {
79    /// Case-folded (upper-case) path used for the lookup key in
80    /// `plsql-symbols`.
81    pub parts: Vec<String>,
82    /// Source-form path preserved so the report renderer can show
83    /// the operator's original casing in diagnostics.
84    pub display: String,
85}
86
87#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
88#[serde(rename_all = "snake_case")]
89pub enum UnknownExprReason {
90    /// Expression text didn't match any recognised shape.
91    UnrecognizedShape,
92    /// Parens didn't balance; we don't try to sub-parse.
93    UnbalancedParens,
94    /// String quote didn't close.
95    UnterminatedString,
96    /// Expression nesting exceeded [`MAX_EXPR_DEPTH`]. A crafted
97    /// flat binary chain (`a OR a OR … OR a`, ~8000 operands) or a
98    /// deeply-nested paren / call / unary spine would otherwise drive
99    /// `lower_expression` into linear-depth recursion that overflows
100    /// the stack and aborts the analyzer (SIGABRT) — an unrecoverable
101    /// DoS on untrusted PL/SQL input. At the cap we stop recursing and
102    /// surface the truncation honestly as this typed reason (R13:
103    /// never crash, never silently swallow uncertainty) rather than
104    /// descending further.
105    ExprDepthLimit,
106}
107
108/// Maximum expression-lowering recursion depth. Real well-formed
109/// PL/SQL expressions nest far below this; the cap exists only so a
110/// crafted flat binary chain or pathologically-nested paren / call /
111/// unary spine cannot drive `lower_expression` (and the secondary
112/// tree-walk consumers that re-walk the produced `Box<Expr>` chain to
113/// identical depth — `collect_calls`, `collect_expr_flow`,
114/// `canonicalize_expr`) into a stack-overflow / SIGABRT. Chosen high
115/// enough that it never clips genuine expressions and low enough that
116/// 256 frames of the walk cannot overflow even a 2 MiB tokio worker
117/// stack. Mirrors the honest-degradation posture of
118/// [`crate::MAX_RELOWER_DEPTH`].
119pub const MAX_EXPR_DEPTH: usize = 256;
120
121/// Lower a raw expression-source slice into an [`Expr`]. Errors
122/// surface as `Expr::Raw` with a typed reason — never panic.
123///
124/// The public signature is depth-agnostic; the recursion budget is
125/// threaded internally via [`lower_expression_depth`] so a crafted
126/// flat binary chain or deep paren/call/unary spine in untrusted
127/// input degrades to [`UnknownExprReason::ExprDepthLimit`] at
128/// [`MAX_EXPR_DEPTH`] instead of overflowing the stack.
129#[must_use]
130pub fn lower_expression(source: &str) -> Expr {
131    lower_expression_depth(source, 0)
132}
133
134/// Depth-bounded core of [`lower_expression`]. `depth` is the current
135/// recursion depth; every internal recursion site passes `depth + 1`.
136/// At `depth >= MAX_EXPR_DEPTH` we refuse to descend and return an
137/// honest [`UnknownExprReason::ExprDepthLimit`] `Raw` node carrying the
138/// untouched source, so the cap is surfaced as a typed degradation
139/// rather than silently swallowed.
140#[must_use]
141fn lower_expression_depth(source: &str, depth: usize) -> Expr {
142    if depth >= MAX_EXPR_DEPTH {
143        return Expr::Raw {
144            text: source.to_string(),
145            reason: UnknownExprReason::ExprDepthLimit,
146        };
147    }
148    let trimmed = source.trim().trim_end_matches(';').trim();
149    if trimmed.is_empty() {
150        return Expr::Null;
151    }
152
153    // Quick wins: literals.
154    if let Some(lit) = recognise_keyword_literal(trimmed) {
155        return lit;
156    }
157    if let Some(lit) = recognise_string_literal(trimmed) {
158        return lit;
159    }
160    if let Some(lit) = recognise_datetime_literal(trimmed) {
161        return lit;
162    }
163    if let Some(lit) = recognise_numeric_literal(trimmed) {
164        return lit;
165    }
166    if let Some(b) = recognise_bind(trimmed) {
167        return b;
168    }
169    if let Some(s) = recognise_substitution(trimmed) {
170        return s;
171    }
172
173    // A whole-expression parenthesised group `( <expr> )` is unwrapped
174    // FIRST — before the binary split — so e.g. `('SELECT ' || p_user)`
175    // strips to its inner `||` chain (taint then survives), and a
176    // parenthesised call operand `(compute(x))` is not dropped as
177    // `Raw{UnrecognizedShape}`. It is placed ahead of
178    // `recognise_top_level_binary` so the inner operator splits normally
179    // on the unwrapped text; an outer binary like `(a) + (b)` is NOT a
180    // single group (depth returns to 0 mid-text), so it falls through to
181    // the binary split unchanged.
182    if let Some(e) = recognise_paren_group(trimmed, depth) {
183        return e;
184    }
185
186    // Binary operator at the top level (lowest precedence first).
187    if let Some(e) = recognise_top_level_binary(trimmed, depth) {
188        return e;
189    }
190
191    // Function / procedure call shape `<name>(<args>)`.
192    if let Some(e) = recognise_call(trimmed, depth) {
193        return e;
194    }
195
196    // Unary `NOT` / `-` / `+` prefix.
197    if let Some(e) = recognise_unary(trimmed, depth) {
198        return e;
199    }
200
201    // Plain name reference (possibly dotted).
202    if is_dotted_name(trimmed) {
203        return Expr::Name(name_ref_from(trimmed));
204    }
205
206    Expr::Raw {
207        text: source.to_string(),
208        reason: UnknownExprReason::UnrecognizedShape,
209    }
210}
211
212fn recognise_keyword_literal(text: &str) -> Option<Expr> {
213    let upper = text.to_ascii_uppercase();
214    match upper.as_str() {
215        "NULL" => Some(Expr::Null),
216        "TRUE" => Some(Expr::BoolLit(true)),
217        "FALSE" => Some(Expr::BoolLit(false)),
218        _ => None,
219    }
220}
221
222fn recognise_string_literal(text: &str) -> Option<Expr> {
223    if !text.starts_with('\'') || !text.ends_with('\'') || text.len() < 2 {
224        return None;
225    }
226    let inner = &text[1..text.len() - 1];
227    // A naked '' separator means this looks like a string but
228    // includes a doubled-quote escape; honour it by un-doubling.
229    //
230    // Walk by `char`, not by byte: `bytes[i] as char` would
231    // reinterpret each UTF-8 byte as a Latin-1 code-point and
232    // corrupt any non-ASCII content. The quote `'` is single-byte
233    // ASCII so a char-based scan handles the escape just as cleanly.
234    let mut out = String::with_capacity(inner.len());
235    let mut chars = inner.chars().peekable();
236    while let Some(c) = chars.next() {
237        if c == '\'' {
238            if chars.peek() == Some(&'\'') {
239                // Doubled quote → one literal `'`.
240                chars.next();
241                out.push('\'');
242                continue;
243            }
244            // A solitary quote in the middle means the source
245            // literal wasn't a single quoted run.
246            return None;
247        }
248        out.push(c);
249    }
250    Some(Expr::StringLit(out))
251}
252
253fn recognise_datetime_literal(text: &str) -> Option<Expr> {
254    let upper = text.to_ascii_uppercase();
255    let keyword = if upper.starts_with("DATE") {
256        "DATE"
257    } else if upper.starts_with("TIMESTAMP") {
258        "TIMESTAMP"
259    } else if upper.starts_with("INTERVAL") {
260        "INTERVAL"
261    } else {
262        return None;
263    };
264    let after = &text[keyword.len()..];
265    let next_byte = after.bytes().next();
266    if let Some(b) = next_byte
267        && (b.is_ascii_alphanumeric() || b == b'_' || b == b'$' || b == b'#')
268    {
269        return None;
270    }
271    let trimmed = after.trim_start();
272    // Mirror `recognise_string_literal`'s `len < 2` guard. For a lone `'`,
273    // `starts_with('\'')` AND `ends_with('\'')` both inspect the same single
274    // byte and are true, so without this guard `&trimmed[1..0]` panics with
275    // "begin > end (1 > 0)". Both quotes are single-byte ASCII so the byte
276    // length comparison is correct. (oracle-ajm2.3)
277    if trimmed.len() < 2 || !trimmed.starts_with('\'') || !trimmed.ends_with('\'') {
278        return None;
279    }
280    let body = &trimmed[1..trimmed.len() - 1];
281    Some(Expr::DateTimeLit {
282        keyword: keyword.to_string(),
283        body: body.to_string(),
284    })
285}
286
287fn recognise_numeric_literal(text: &str) -> Option<Expr> {
288    let bytes = text.as_bytes();
289    if bytes.is_empty() {
290        return None;
291    }
292    let first = bytes[0];
293    if !(first.is_ascii_digit() || (first == b'.' && bytes.len() > 1)) {
294        return None;
295    }
296    let mut saw_dot = false;
297    let mut saw_e = false;
298    for &b in bytes {
299        if b.is_ascii_digit() {
300            continue;
301        }
302        if b == b'.' && !saw_dot && !saw_e {
303            saw_dot = true;
304            continue;
305        }
306        if (b == b'e' || b == b'E') && !saw_e {
307            saw_e = true;
308            saw_dot = true;
309            continue;
310        }
311        if (b == b'+' || b == b'-') && saw_e {
312            continue;
313        }
314        return None;
315    }
316    if saw_dot || saw_e {
317        Some(Expr::FloatLit(text.to_string()))
318    } else {
319        Some(Expr::IntLit(text.to_string()))
320    }
321}
322
323fn recognise_bind(text: &str) -> Option<Expr> {
324    text.strip_prefix(':')
325        .map(|rest| Expr::BindRef(rest.to_string()))
326}
327
328fn recognise_substitution(text: &str) -> Option<Expr> {
329    if let Some(rest) = text.strip_prefix("&&") {
330        return Some(Expr::SubstitutionRef {
331            name: rest.to_string(),
332            sticky: true,
333        });
334    }
335    if let Some(rest) = text.strip_prefix('&') {
336        return Some(Expr::SubstitutionRef {
337            name: rest.to_string(),
338            sticky: false,
339        });
340    }
341    None
342}
343
344/// Unwrap a whole-expression parenthesised group `( <expr> )`.
345///
346/// Returns:
347/// * `None` — `text` does not start with `(` and end with `)`, OR it
348///   does but the leading `(` does NOT match the trailing `)` at the
349///   same nesting level (e.g. `(a) + (b)`, where the depth returns to 0
350///   mid-text). The caller then proceeds to the binary split etc.
351/// * `Some(Expr::Raw{UnbalancedParens})` — the parens do not balance
352///   (depth never closes, or closes early then reopens past the end);
353///   we surface the typed reason rather than mis-stripping.
354/// * `Some(<inner lowered>)` — a clean single group; the inner text is
355///   re-lowered at `depth + 1` so the existing `MAX_EXPR_DEPTH` backstop
356///   on deeply-nested parens still holds.
357///
358/// The scan tracks paren depth and `in_string` (mirroring
359/// [`find_all_top_level_ops`]); non-ASCII bytes are skipped so we never
360/// slice off a UTF-8 char boundary and never miscount on multi-byte
361/// content. The strip slices `[1 .. len-1]`, whose boundaries are the
362/// ASCII `(` / `)` bytes — always valid char boundaries.
363fn recognise_paren_group(text: &str, depth: usize) -> Option<Expr> {
364    let bytes = text.as_bytes();
365    if bytes.first() != Some(&b'(') || bytes.last() != Some(&b')') {
366        return None;
367    }
368    let mut pdepth: i32 = 0;
369    let mut in_string = false;
370    // We strip only when paren depth first returns to 0 *exactly* at the
371    // final byte — i.e. the leading `(` pairs with the trailing `)`. If
372    // depth hits 0 before the last byte, this is not a single group
373    // (`(a) + (b)`) and we return None so the binary split can run.
374    let last = bytes.len() - 1;
375    let mut i = 0usize;
376    while i < bytes.len() {
377        let b = bytes[i];
378        if b >= 0x80 {
379            // Non-ASCII byte: cannot be a paren / quote; skip it.
380            i += 1;
381            continue;
382        }
383        if b == b'\'' {
384            in_string = !in_string;
385            i += 1;
386            continue;
387        }
388        if in_string {
389            i += 1;
390            continue;
391        }
392        if b == b'(' {
393            pdepth += 1;
394        } else if b == b')' {
395            pdepth -= 1;
396            if pdepth == 0 {
397                // The group opened at byte 0 just closed. If that is not
398                // the final byte, the leading `(` does not span the whole
399                // expression (`(a) + (b)`): not a single group.
400                if i != last {
401                    return None;
402                }
403                // Clean single group: strip the outer parens and re-lower.
404                let inner = &text[1..last];
405                return Some(lower_expression_depth(inner, depth + 1));
406            }
407            if pdepth < 0 {
408                // Closed more than opened before the end — unbalanced.
409                return Some(Expr::Raw {
410                    text: text.to_string(),
411                    reason: UnknownExprReason::UnbalancedParens,
412                });
413            }
414        }
415        i += 1;
416    }
417    // Reached the end with the group never closing (depth > 0) or an
418    // unterminated string: the parens do not balance.
419    Some(Expr::Raw {
420        text: text.to_string(),
421        reason: UnknownExprReason::UnbalancedParens,
422    })
423}
424
425/// Look for a binary operator at the **top level** (paren depth 0,
426/// quote depth 0). Honours a small precedence table — find the
427/// lowest-precedence top-level operator and split on it.
428///
429/// For the matching tier we collect **every** top-level operand run
430/// in one pass and build the `Expr::Binary` spine *iteratively*
431/// (right-fold, matching the leftmost-split recursive shape) so a
432/// flat left-associative chain (`a OR a OR … OR a`, thousands of
433/// operands) no longer drives lowering recursion linearly with
434/// operand count — it would otherwise overflow the stack and abort
435/// the analyzer on crafted untrusted input. `depth` backstops nested
436/// paren / call / unary spines and bounds the produced tree's depth so
437/// the secondary tree-walk consumers (`collect_calls`,
438/// `collect_expr_flow`, `canonicalize_expr`) that re-walk the chain to
439/// identical depth stay bounded too.
440fn recognise_top_level_binary(text: &str, depth: usize) -> Option<Expr> {
441    // Operators in *decreasing* match width so multi-char ops
442    // (`<=`, `>=`, `<>`, `||`) win over single-char ones at the
443    // same position.
444    // `=` shares the relational tier with the comparison operators so a
445    // separate, higher `&["="]` tier cannot match the `=` byte inside `<=`,
446    // `>=`, or `!=` before the 2-char form is tried. Multi-char ops stay
447    // ahead of single-char ones within the tier (the scan tries each op
448    // left-to-right at the same byte), so `a <= b` reaches the `<` byte and
449    // matches `<=` rather than splitting on the embedded `=`. (oracle-ajm2.10)
450    let precedence: &[&[&str]] = &[
451        &["OR"],
452        &["AND"],
453        &["<>", "!=", "<=", ">=", "=", "<", ">"],
454        &["||"],
455        &["+", "-"],
456        &["*", "/"],
457    ];
458
459    for tier in precedence {
460        // Collect operand segments and the operator between each
461        // adjacent pair, in source order. Empty when the tier has no
462        // top-level op here, so we fall through to the next tier.
463        let (operands, ops) = find_all_top_level_ops(text, tier);
464        if ops.is_empty() {
465            continue;
466        }
467        // Right-fold to reproduce the leftmost-split recursive shape:
468        // `a OP b OP c` → `OP(a, OP(b, c))`. Building the spine in a
469        // loop (rather than via `lower_expression` re-descending the
470        // tail) keeps a flat N-operand chain from costing N stack
471        // frames during lowering.
472        //
473        // The produced spine is right-leaning, so the i-th `Binary`
474        // node (counting the topmost as level `depth`) sits at tree
475        // depth `depth + i`, and the left operands are the *shallow*
476        // ones while the rightmost operands form the *deep* tail. To
477        // keep the WHOLE produced tree — operands included — bounded by
478        // `MAX_EXPR_DEPTH` (so the downstream tree-walk consumers that
479        // re-walk the chain stay within the stack), we materialise only
480        // the shallow prefix as real `Binary` nodes and collapse the
481        // deep tail (everything from `cap` onward, joined by its ops)
482        // into a single honest `ExprDepthLimit` `Raw`.
483        let n = operands.len();
484        // `cap` is the number of leading operands we can keep as a real
485        // spine before the next `Binary` node would reach the depth cap.
486        // The node wrapping `operands[idx]` sits at tree depth
487        // `depth + idx`, so we may keep indices with `depth + idx <
488        // MAX_EXPR_DEPTH`. There are at most `n - 1` `Binary` nodes.
489        let cap = MAX_EXPR_DEPTH.saturating_sub(depth).min(n - 1);
490        // Seed the fold with the deep tail. When `cap < n - 1` the tail
491        // `operands[cap..]` (with the ops joining them) overflows the
492        // budget and degrades honestly; otherwise the tail is just the
493        // rightmost operand lowered normally.
494        let mut acc = if cap < n - 1 {
495            Expr::Raw {
496                text: join_operand_tail(text, &operands, cap),
497                reason: UnknownExprReason::ExprDepthLimit,
498            }
499        } else {
500            // cap == n - 1: the whole chain fits. The rightmost operand
501            // sits at tree depth `depth + (n - 1)`.
502            lower_expression_depth(operands[n - 1], depth + n)
503        };
504        for idx in (0..cap).rev() {
505            // `Binary` node for `operands[idx]` sits at tree depth
506            // `depth + idx`; its lhs operand one level deeper.
507            let lhs = lower_expression_depth(operands[idx], depth + idx + 1);
508            acc = Expr::Binary {
509                op: ops[idx].to_string(),
510                lhs: Box::new(lhs),
511                rhs: Box::new(acc),
512            };
513        }
514        return Some(acc);
515    }
516    None
517}
518
519/// Collect **every** top-level (paren depth 0, outside string) split
520/// point for `ops`, in source order, returning the operand runs and the
521/// operators between them. Used to fold a flat binary chain iteratively
522/// rather than recursively (see [`recognise_top_level_binary`]) so a
523/// crafted ~8000-operand chain cannot drive lowering into linear-depth
524/// recursion and overflow the stack.
525///
526/// The per-byte scan, the case-insensitive ASCII compare, the
527/// word-boundary check for alpha operators (`AND`, `OR`), the non-ASCII
528/// / in-string skips, and the empty-operand skip mirror what repeated
529/// leftmost-split recursion would have produced. Scanning by byte and
530/// slicing only on validated ASCII operator positions keeps every
531/// produced operand slice on a UTF-8 char boundary even for content
532/// with multi-byte characters.
533///
534/// Returns `(operands, ops)` with `operands.len() == ops.len() + 1` when
535/// at least one operator matched, and `(vec![], vec![])` otherwise.
536fn find_all_top_level_ops<'a, 'b>(
537    text: &'a str,
538    ops: &'b [&'b str],
539) -> (Vec<&'a str>, Vec<&'b str>) {
540    let bytes = text.as_bytes();
541    let mut depth: i32 = 0;
542    let mut in_string = false;
543    let mut i = 0;
544    // Start of the current operand run (byte offset just past the
545    // previous operator, or 0 for the first run).
546    let mut seg_start = 0usize;
547    let mut operands: Vec<&'a str> = Vec::new();
548    let mut found_ops: Vec<&'b str> = Vec::new();
549    while i < bytes.len() {
550        let b = bytes[i];
551        // Skip non-ASCII bytes: all operators are ASCII so we can never
552        // start a match here; skipping avoids slicing on a non-char-boundary.
553        if b >= 0x80 {
554            i += 1;
555            continue;
556        }
557        if b == b'\'' {
558            in_string = !in_string;
559            i += 1;
560            continue;
561        }
562        if in_string {
563            i += 1;
564            continue;
565        }
566        if b == b'(' {
567            depth += 1;
568            i += 1;
569            continue;
570        }
571        if b == b')' {
572            depth -= 1;
573            i += 1;
574            continue;
575        }
576        if depth != 0 {
577            i += 1;
578            continue;
579        }
580
581        let mut matched = false;
582        for op in ops {
583            let op_bytes = op.as_bytes();
584            if i + op_bytes.len() > bytes.len() {
585                continue;
586            }
587            let candidate_bytes = &bytes[i..i + op_bytes.len()];
588            let matches = candidate_bytes
589                .iter()
590                .zip(op_bytes.iter())
591                .all(|(&cb, &ob)| cb.to_ascii_uppercase() == ob);
592            if !matches {
593                continue;
594            }
595            // Word-boundary check for alpha operators (`AND`, `OR`).
596            if op.chars().all(|c| c.is_ascii_alphabetic()) {
597                let prev_ok = i == 0 || {
598                    let p = bytes[i - 1];
599                    !(p.is_ascii_alphanumeric() || p == b'_')
600                };
601                let next_ok = i + op_bytes.len() == bytes.len() || {
602                    let n = bytes[i + op_bytes.len()];
603                    !(n.is_ascii_alphanumeric() || n == b'_')
604                };
605                if !(prev_ok && next_ok) {
606                    continue;
607                }
608            }
609            // The operand to the left of this operator and the would-be
610            // operand to its right must both be non-empty after trimming
611            // so e.g. a leading unary `-` is not mistaken for a binary
612            // split point, and a trailing operator never yields an empty
613            // operand.
614            let lhs = text[seg_start..i].trim();
615            let rhs = text[i + op_bytes.len()..].trim();
616            if lhs.is_empty() || rhs.is_empty() {
617                continue;
618            }
619            operands.push(lhs);
620            found_ops.push(*op);
621            // Resume scanning past this operator; the next run starts here.
622            i += op_bytes.len();
623            seg_start = i;
624            matched = true;
625            break;
626        }
627        if !matched {
628            i += 1;
629        }
630    }
631    if found_ops.is_empty() {
632        return (Vec::new(), Vec::new());
633    }
634    // The final operand run from the last operator to the end.
635    operands.push(text[seg_start..].trim());
636    (operands, found_ops)
637}
638
639/// Reconstruct the source text of the deep operand tail
640/// `operands[cap..]` (with their joining operators) for an honest
641/// `ExprDepthLimit` `Raw`. The operand slices are all sub-slices of the
642/// original `text`, so the tail spans from the start of `operands[cap]`
643/// to the end of the last operand — recovered by byte-offset arithmetic
644/// against `text` rather than re-joining with a guessed separator.
645fn join_operand_tail(text: &str, operands: &[&str], cap: usize) -> String {
646    debug_assert!(cap < operands.len());
647    // SAFETY of offsets: every entry in `operands` is a trimmed
648    // sub-slice of `text`, so `as_ptr()` arithmetic yields a valid byte
649    // offset within `text`. The tail runs from the first byte of
650    // `operands[cap]` to the last byte of the final operand.
651    let base = text.as_ptr() as usize;
652    let first = operands[cap];
653    let last = operands[operands.len() - 1];
654    let start = (first.as_ptr() as usize).saturating_sub(base);
655    let end_rel = (last.as_ptr() as usize).saturating_sub(base) + last.len();
656    let end = end_rel.min(text.len());
657    if start <= end && text.is_char_boundary(start) && text.is_char_boundary(end) {
658        text[start..end].to_string()
659    } else {
660        // Defensive fallback (should not happen): preserve the whole text.
661        text.to_string()
662    }
663}
664
665fn recognise_call(text: &str, depth: usize) -> Option<Expr> {
666    let open = text.find('(')?;
667    if !text.ends_with(')') {
668        return None;
669    }
670    let name_part = text[..open].trim();
671    if !is_dotted_name(name_part) {
672        return None;
673    }
674    let inner = &text[open + 1..text.len() - 1];
675    let args = split_top_level_args(inner)
676        .into_iter()
677        .map(|s| lower_expression_depth(&s, depth + 1))
678        .collect();
679    Some(Expr::Call {
680        callee: name_ref_from(name_part),
681        args,
682    })
683}
684
685fn split_top_level_args(inner: &str) -> Vec<String> {
686    let mut out: Vec<String> = Vec::new();
687    let mut buf = String::new();
688    let mut depth: i32 = 0;
689    let mut in_string = false;
690    for c in inner.chars() {
691        if c == '\'' {
692            in_string = !in_string;
693            buf.push(c);
694            continue;
695        }
696        if in_string {
697            buf.push(c);
698            continue;
699        }
700        if c == '(' {
701            depth += 1;
702        } else if c == ')' {
703            depth -= 1;
704        } else if c == ',' && depth == 0 {
705            out.push(std::mem::take(&mut buf));
706            continue;
707        }
708        buf.push(c);
709    }
710    if !buf.trim().is_empty() {
711        out.push(buf);
712    }
713    out
714}
715
716fn recognise_unary(text: &str, depth: usize) -> Option<Expr> {
717    let upper = text.to_ascii_uppercase();
718    if upper.starts_with("NOT ") {
719        let len = "NOT ".len();
720        return Some(Expr::Unary {
721            op: "NOT".into(),
722            operand: Box::new(lower_expression_depth(&text[len..], depth + 1)),
723        });
724    }
725    if text.starts_with('-') || text.starts_with('+') {
726        let op = if text.starts_with('-') { "-" } else { "+" };
727        return Some(Expr::Unary {
728            op: op.into(),
729            operand: Box::new(lower_expression_depth(text[1..].trim(), depth + 1)),
730        });
731    }
732    let _ = upper;
733    None
734}
735
736fn is_dotted_name(text: &str) -> bool {
737    let bytes = text.as_bytes();
738    if bytes.is_empty() {
739        return false;
740    }
741    if !(bytes[0].is_ascii_alphabetic() || bytes[0] == b'_') {
742        return false;
743    }
744    bytes
745        .iter()
746        .all(|&b| b.is_ascii_alphanumeric() || b == b'_' || b == b'$' || b == b'#' || b == b'.')
747        && !text.contains("..")
748}
749
750fn name_ref_from(text: &str) -> NameRef {
751    let parts: Vec<String> = text.split('.').map(|p| p.to_ascii_uppercase()).collect();
752    NameRef {
753        parts,
754        display: text.to_string(),
755    }
756}
757
758#[cfg(test)]
759mod tests {
760    use super::*;
761
762    #[test]
763    fn null_literal() {
764        assert_eq!(lower_expression("NULL"), Expr::Null);
765        assert_eq!(lower_expression("null"), Expr::Null);
766    }
767
768    #[test]
769    fn boolean_literals() {
770        assert_eq!(lower_expression("TRUE"), Expr::BoolLit(true));
771        assert_eq!(lower_expression("false"), Expr::BoolLit(false));
772    }
773
774    #[test]
775    fn integer_and_float_literals() {
776        assert_eq!(lower_expression("42"), Expr::IntLit("42".into()));
777        assert!(matches!(lower_expression("1.5e+12"), Expr::FloatLit(_)));
778        assert!(matches!(lower_expression("3.14"), Expr::FloatLit(_)));
779    }
780
781    #[test]
782    fn string_literal_unescapes_doubled_quotes() {
783        assert_eq!(
784            lower_expression("'it''s fine'"),
785            Expr::StringLit("it's fine".into())
786        );
787    }
788
789    // oracle-4cne: non-ASCII string-literal content must survive
790    // un-doubling intact. Walking bytes (`bytes[i] as char`) would
791    // reinterpret each UTF-8 byte as Latin-1 and corrupt the literal.
792    #[test]
793    fn string_literal_preserves_non_ascii_utf8() {
794        // Accented characters, a non-Latin script, and punctuation
795        // that all live outside the ASCII range.
796        let src = "'café — déjà vu — Москва — 日本語'";
797        let expected = "café — déjà vu — Москва — 日本語";
798        assert_eq!(
799            lower_expression(src),
800            Expr::StringLit(expected.into()),
801            "non-ASCII string literal must round-trip byte-for-byte"
802        );
803    }
804
805    // oracle-4cne: un-doubling must still work when doubled quotes
806    // sit next to multi-byte content.
807    #[test]
808    fn string_literal_unescapes_doubled_quotes_with_non_ascii() {
809        assert_eq!(
810            lower_expression("'garçon''s café'"),
811            Expr::StringLit("garçon's café".into())
812        );
813    }
814
815    #[test]
816    fn datetime_literals_word_boundary_safe() {
817        assert!(matches!(
818            lower_expression("DATE '2024-05-15'"),
819            Expr::DateTimeLit { keyword, .. } if keyword == "DATE"
820        ));
821        // `DATE_HIRED` is NOT a date literal — falls through to Name.
822        assert!(matches!(lower_expression("DATE_HIRED"), Expr::Name(_)));
823    }
824
825    #[test]
826    fn bind_ref_and_substitution_ref() {
827        assert_eq!(
828            lower_expression(":bind_name"),
829            Expr::BindRef("bind_name".into())
830        );
831        assert_eq!(lower_expression(":1"), Expr::BindRef("1".into()));
832        assert_eq!(
833            lower_expression("&var"),
834            Expr::SubstitutionRef {
835                name: "var".into(),
836                sticky: false,
837            }
838        );
839        assert_eq!(
840            lower_expression("&&sticky"),
841            Expr::SubstitutionRef {
842                name: "sticky".into(),
843                sticky: true,
844            }
845        );
846    }
847
848    #[test]
849    fn dotted_name_reference() {
850        if let Expr::Name(n) = lower_expression("hr.employees.emp_id") {
851            assert_eq!(n.parts, vec!["HR", "EMPLOYEES", "EMP_ID"]);
852            assert_eq!(n.display, "hr.employees.emp_id");
853        } else {
854            panic!();
855        }
856    }
857
858    #[test]
859    fn function_call_with_two_args() {
860        if let Expr::Call { callee, args } = lower_expression("nvl(v_x, 0)") {
861            assert_eq!(callee.parts, vec!["NVL"]);
862            assert_eq!(args.len(), 2);
863        } else {
864            panic!();
865        }
866    }
867
868    #[test]
869    fn nested_call_arguments_preserved() {
870        if let Expr::Call { args, .. } = lower_expression("nvl(coalesce(a, b), 0)") {
871            assert_eq!(args.len(), 2);
872            assert!(matches!(args[0], Expr::Call { .. }));
873        } else {
874            panic!();
875        }
876    }
877
878    #[test]
879    fn binary_operator_low_precedence_wins() {
880        if let Expr::Binary { op, .. } = lower_expression("a AND b OR c") {
881            // OR has lower precedence → top-level op is OR.
882            assert_eq!(op, "OR");
883        } else {
884            panic!();
885        }
886    }
887
888    #[test]
889    fn string_concat_is_a_binary() {
890        if let Expr::Binary { op, .. } = lower_expression("first_name || ' ' || last_name") {
891            assert_eq!(op, "||");
892        } else {
893            panic!();
894        }
895    }
896
897    #[test]
898    fn unary_not_negates_inner() {
899        if let Expr::Unary { op, operand } = lower_expression("NOT v_flag") {
900            assert_eq!(op, "NOT");
901            assert!(matches!(*operand, Expr::Name(_)));
902        } else {
903            panic!();
904        }
905    }
906
907    #[test]
908    fn paren_protects_inner_op_from_top_level_split() {
909        // `(a OR b) AND c` — top-level op is AND, not OR.
910        if let Expr::Binary { op, .. } = lower_expression("(a OR b) AND c") {
911            assert_eq!(op, "AND");
912        } else {
913            panic!();
914        }
915    }
916
917    #[test]
918    fn relational_two_char_ops_not_mis_split_on_embedded_equals() {
919        // oracle-ajm2.10: a higher-precedence `&["="]` tier matched the `=`
920        // byte inside `<=`/`>=`/`!=` before the relational tier was reached,
921        // corrupting the LHS into a Raw node (`a <`) and op into `=`. Merging
922        // `=` into the relational tier (2-char ops first) fixes the split.
923        for (src, expected_op) in [("a <= b", "<="), ("a >= b", ">="), ("a != b", "!=")] {
924            match lower_expression(src) {
925                Expr::Binary { op, lhs, rhs } => {
926                    assert_eq!(op, expected_op, "op for {src:?}");
927                    assert!(
928                        matches!(*lhs, Expr::Name(_)),
929                        "lhs of {src:?} must lower to a Name, not Raw: {lhs:?}"
930                    );
931                    assert!(
932                        matches!(*rhs, Expr::Name(_)),
933                        "rhs of {src:?} must lower to a Name: {rhs:?}"
934                    );
935                }
936                other => panic!("{src:?} should lower to Binary, got {other:?}"),
937            }
938        }
939    }
940
941    #[test]
942    fn unaffected_comparison_ops_still_split_correctly() {
943        // The relational-tier merge must not regress the operators that
944        // already worked: `=`, `<`, `>`, `<>`.
945        for (src, expected_op) in [
946            ("a = b", "="),
947            ("a < b", "<"),
948            ("a > b", ">"),
949            ("a <> b", "<>"),
950        ] {
951            match lower_expression(src) {
952                Expr::Binary { op, lhs, rhs } => {
953                    assert_eq!(op, expected_op, "op for {src:?}");
954                    assert!(matches!(*lhs, Expr::Name(_)), "lhs of {src:?}: {lhs:?}");
955                    assert!(matches!(*rhs, Expr::Name(_)), "rhs of {src:?}: {rhs:?}");
956                }
957                other => panic!("{src:?} should lower to Binary, got {other:?}"),
958            }
959        }
960    }
961
962    #[test]
963    fn call_on_lhs_of_le_is_preserved_for_calls_edge() {
964        // oracle-ajm2.10: with the mis-split, `compute_total(x)` on the LHS of
965        // `<=` became a Raw node, dropped by collect_calls — a Calls-edge false
966        // negative. After the fix the LHS lowers to a Call the extractor sees.
967        match lower_expression("compute_total(x) <= 10") {
968            Expr::Binary { op, lhs, .. } => {
969                assert_eq!(op, "<=");
970                assert!(
971                    matches!(*lhs, Expr::Call { .. }),
972                    "LHS must lower to a Call so the Calls-edge is emitted: {lhs:?}"
973                );
974            }
975            other => panic!("expected Binary, got {other:?}"),
976        }
977    }
978
979    #[test]
980    fn datetime_lone_quote_does_not_panic() {
981        // oracle-ajm2.3: `DATE'` (and the TIMESTAMP/INTERVAL/whitespace
982        // variants) used to slice `&trimmed[1..0]` and panic. With the
983        // `len < 2` guard the recognizer declines and lowering falls through
984        // to a non-panicking Expr (Raw for these unrecognised shapes).
985        for src in ["DATE'", "TIMESTAMP '", "INTERVAL '", "DATE   '"] {
986            let e = lower_expression(src);
987            assert!(
988                !matches!(e, Expr::DateTimeLit { .. }),
989                "{src:?} is not a well-formed datetime literal: {e:?}"
990            );
991        }
992        // A well-formed literal still parses.
993        assert!(matches!(
994            lower_expression("DATE'2020-01-01'"),
995            Expr::DateTimeLit { .. }
996        ));
997    }
998
999    #[test]
1000    fn unrecognised_expression_lands_as_raw() {
1001        if let Expr::Raw { reason, .. } = lower_expression("@@@") {
1002            assert_eq!(reason, UnknownExprReason::UnrecognizedShape);
1003        } else {
1004            panic!();
1005        }
1006    }
1007
1008    #[test]
1009    fn empty_expression_yields_null() {
1010        assert_eq!(lower_expression(""), Expr::Null);
1011        assert_eq!(lower_expression("  ;  "), Expr::Null);
1012    }
1013
1014    #[test]
1015    fn string_with_operator_inside_does_not_split() {
1016        if let Expr::StringLit(s) = lower_expression("'a + b'") {
1017            assert_eq!(s, "a + b");
1018        } else {
1019            panic!();
1020        }
1021    }
1022
1023    // ---- oracle-aqum.1: expression-lowering recursion-depth cap ----
1024
1025    /// Iteratively measure the maximum nesting depth of an `Expr`
1026    /// tree without itself recursing (so the measurement cannot
1027    /// stack-overflow on the very trees we are guarding against).
1028    fn expr_depth(root: &Expr) -> usize {
1029        let mut max = 0usize;
1030        let mut stack: Vec<(&Expr, usize)> = vec![(root, 1)];
1031        while let Some((e, d)) = stack.pop() {
1032            if d > max {
1033                max = d;
1034            }
1035            match e {
1036                Expr::Binary { lhs, rhs, .. } => {
1037                    stack.push((lhs, d + 1));
1038                    stack.push((rhs, d + 1));
1039                }
1040                Expr::Unary { operand, .. } => stack.push((operand, d + 1)),
1041                Expr::Call { args, .. } => {
1042                    for a in args {
1043                        stack.push((a, d + 1));
1044                    }
1045                }
1046                _ => {}
1047            }
1048        }
1049        max
1050    }
1051
1052    #[test]
1053    fn flat_binary_chain_left_fold_shape_preserved() {
1054        // The iterative left-fold must reproduce the same right-leaning
1055        // tree the old leftmost-split recursion produced:
1056        // `a OR b OR c` → OR(a, OR(b, c)).
1057        match lower_expression("a OR b OR c") {
1058            Expr::Binary { op, lhs, rhs } => {
1059                assert_eq!(op, "OR");
1060                assert!(matches!(*lhs, Expr::Name(_)), "outer lhs is `a`: {lhs:?}");
1061                match *rhs {
1062                    Expr::Binary {
1063                        op: ref iop,
1064                        ref lhs,
1065                        ref rhs,
1066                    } => {
1067                        assert_eq!(iop, "OR");
1068                        assert!(matches!(**lhs, Expr::Name(_)), "inner lhs is `b`: {lhs:?}");
1069                        assert!(matches!(**rhs, Expr::Name(_)), "inner rhs is `c`: {rhs:?}");
1070                    }
1071                    other => panic!("inner rhs should be Binary, got {other:?}"),
1072                }
1073            }
1074            other => panic!("expected Binary, got {other:?}"),
1075        }
1076    }
1077
1078    #[test]
1079    fn mixed_same_tier_ops_fold_in_source_order() {
1080        // `a - b + c` lives in the `[+, -]` tier; the leftmost-split
1081        // recursion produced `-(a, +(b, c))`. The fold must keep the
1082        // per-position operator, not collapse them to one symbol.
1083        match lower_expression("a - b + c") {
1084            Expr::Binary { op, rhs, .. } => {
1085                assert_eq!(op, "-", "outer op is the leftmost `-`");
1086                match *rhs {
1087                    Expr::Binary { op: iop, .. } => assert_eq!(iop, "+"),
1088                    other => panic!("inner should be `+` Binary, got {other:?}"),
1089                }
1090            }
1091            other => panic!("expected Binary, got {other:?}"),
1092        }
1093    }
1094
1095    #[test]
1096    fn wide_or_chain_does_not_stack_overflow_and_is_depth_bounded() {
1097        // oracle-aqum.1: a crafted single assignment RHS
1098        // `a OR a OR … OR a` with ~1,000,000 operands previously drove
1099        // `lower_expression` into linear-depth recursion (spine depth ==
1100        // operand count − 1) that overflowed the stack and aborted the
1101        // analyzer (SIGABRT) — an unrecoverable DoS on untrusted PL/SQL.
1102        // After the fix the chain is folded iteratively and the produced
1103        // tree is capped at `MAX_EXPR_DEPTH`, so neither lowering nor the
1104        // downstream tree-walk consumers can overflow.
1105        let n = 1_000_000usize;
1106        let mut chain = String::with_capacity(n * 5);
1107        for i in 0..n {
1108            if i > 0 {
1109                chain.push_str(" OR ");
1110            }
1111            chain.push('a');
1112        }
1113        let lowered = lower_expression(&chain);
1114        // Must be a Binary spine (well-formed prefix) terminating in an
1115        // honest depth-limit Raw, never a panic / abort.
1116        assert!(
1117            matches!(lowered, Expr::Binary { .. }),
1118            "wide chain should lower to a Binary spine: {lowered:?}"
1119        );
1120        let depth = expr_depth(&lowered);
1121        assert!(
1122            depth <= MAX_EXPR_DEPTH + 1,
1123            "produced tree depth {depth} must stay bounded by the cap \
1124             (MAX_EXPR_DEPTH={MAX_EXPR_DEPTH}); an unbounded spine would \
1125             overflow the downstream walkers"
1126        );
1127        // The deep tail must be surfaced honestly, not silently dropped.
1128        assert!(
1129            contains_depth_limit_raw(&lowered),
1130            "an over-deep chain must surface an ExprDepthLimit Raw (R13)"
1131        );
1132    }
1133
1134    fn contains_depth_limit_raw(root: &Expr) -> bool {
1135        let mut stack: Vec<&Expr> = vec![root];
1136        while let Some(e) = stack.pop() {
1137            match e {
1138                Expr::Raw { reason, .. } if *reason == UnknownExprReason::ExprDepthLimit => {
1139                    return true;
1140                }
1141                Expr::Binary { lhs, rhs, .. } => {
1142                    stack.push(lhs);
1143                    stack.push(rhs);
1144                }
1145                Expr::Unary { operand, .. } => stack.push(operand),
1146                Expr::Call { args, .. } => stack.extend(args.iter()),
1147                _ => {}
1148            }
1149        }
1150        false
1151    }
1152
1153    #[test]
1154    fn deeply_nested_parens_degrade_to_depth_limit_not_overflow() {
1155        // A pathological paren spine `((((…a…))))` also drives the
1156        // call/unary/binary recursion; the cap must backstop it.
1157        let depth = MAX_EXPR_DEPTH + 50;
1158        let mut s = String::new();
1159        for _ in 0..depth {
1160            s.push('(');
1161        }
1162        s.push_str("a OR a");
1163        for _ in 0..depth {
1164            s.push(')');
1165        }
1166        // Wrap so the outermost parens are not stripped to a bare expr;
1167        // a leading `NOT ` forces the unary recursion path too.
1168        let src = format!("NOT {s}");
1169        let lowered = lower_expression(&src);
1170        assert!(
1171            expr_depth(&lowered) <= MAX_EXPR_DEPTH + 1,
1172            "deep paren/unary spine must stay depth-bounded: {lowered:?}"
1173        );
1174    }
1175
1176    #[test]
1177    fn short_chain_within_budget_keeps_all_operands() {
1178        // A chain comfortably under the cap must NOT degrade: every
1179        // operand stays a real Name and no ExprDepthLimit Raw appears.
1180        let n = 64usize;
1181        let chain = vec!["x"; n].join(" OR ");
1182        let lowered = lower_expression(&chain);
1183        assert!(
1184            !contains_depth_limit_raw(&lowered),
1185            "a {n}-operand chain is well within MAX_EXPR_DEPTH and must \
1186             not be truncated"
1187        );
1188        // Exactly n-1 OR nodes and n Name leaves.
1189        let mut names = 0usize;
1190        let mut bins = 0usize;
1191        let mut stack: Vec<&Expr> = vec![&lowered];
1192        while let Some(e) = stack.pop() {
1193            match e {
1194                Expr::Name(_) => names += 1,
1195                Expr::Binary { op, lhs, rhs } => {
1196                    assert_eq!(op, "OR");
1197                    bins += 1;
1198                    stack.push(lhs);
1199                    stack.push(rhs);
1200                }
1201                other => panic!("unexpected node {other:?}"),
1202            }
1203        }
1204        assert_eq!(names, n, "all operands preserved");
1205        assert_eq!(bins, n - 1, "one OR per gap");
1206    }
1207
1208    // oracle-hrzg.5: a bare `(p_user)` group unwraps to the inner Name
1209    // (it used to fall through to Raw{UnrecognizedShape}).
1210    #[test]
1211    fn paren_group_unwraps_to_inner_name() {
1212        match lower_expression("(p_user)") {
1213            Expr::Name(n) => assert_eq!(n.parts, vec!["P_USER"]),
1214            other => panic!("expected Name, got {other:?}"),
1215        }
1216    }
1217
1218    // oracle-hrzg.5: a whole-expression group `('a' || p_user)` is
1219    // unwrapped first, then the inner `||` splits, so the top node is the
1220    // concatenation Binary — not a Raw.
1221    #[test]
1222    fn whole_expression_paren_group_unwraps_then_splits_inner_op() {
1223        match lower_expression("('SELECT ' || p_user)") {
1224            Expr::Binary { op, .. } => assert_eq!(op, "||"),
1225            other => panic!("expected `||` Binary, got {other:?}"),
1226        }
1227    }
1228
1229    // oracle-hrzg.5: a parenthesised CALL operand `(compute(x))` unwraps
1230    // to the inner Call (the call recogniser bailed on a bare `(...)`).
1231    #[test]
1232    fn paren_group_unwraps_to_inner_call() {
1233        match lower_expression("(compute(x))") {
1234            Expr::Call { callee, .. } => assert_eq!(callee.parts, vec!["COMPUTE"]),
1235            other => panic!("expected Call, got {other:?}"),
1236        }
1237    }
1238
1239    // oracle-hrzg.5: `(a) + (b)` is NOT a single group — the leading `(`
1240    // closes at the first `)` (mid-text), so the recognizer must return
1241    // None and let the `+` split run. Mis-stripping it would corrupt the
1242    // parse.
1243    #[test]
1244    fn two_adjacent_groups_are_not_stripped_as_one() {
1245        match lower_expression("(a) + (b)") {
1246            Expr::Binary { op, lhs, rhs } => {
1247                assert_eq!(op, "+");
1248                assert!(matches!(*lhs, Expr::Name(_)), "lhs `(a)` → Name: {lhs:?}");
1249                assert!(matches!(*rhs, Expr::Name(_)), "rhs `(b)` → Name: {rhs:?}");
1250            }
1251            other => panic!("expected `+` Binary over two Names, got {other:?}"),
1252        }
1253    }
1254
1255    // oracle-hrzg.5: an expression that starts with `(` and ends with
1256    // `)` but whose parens never balance (`((a)` — depth never returns
1257    // to 0) degrades to the typed `UnbalancedParens` reason rather than
1258    // mis-stripping a non-matching pair.
1259    #[test]
1260    fn unbalanced_paren_group_degrades_to_typed_reason() {
1261        match lower_expression("((a)") {
1262            Expr::Raw { reason, .. } => {
1263                assert_eq!(reason, UnknownExprReason::UnbalancedParens)
1264            }
1265            other => panic!("expected UnbalancedParens Raw, got {other:?}"),
1266        }
1267    }
1268}