Skip to main content

jetro_core/
analysis.rs

1//! Static analysis passes over compiled `Program` IR.
2//!
3//! Forward-flow analyses that produce *abstract domains* for each opcode
4//! position.  Callers (compiler, planner, caller code) can use these to:
5//!
6//! - emit specialised opcodes where a value's type / nullness / cardinality
7//!   is statically known,
8//! - reject ill-typed expressions at compile time,
9//! - enable further peephole passes that require type awareness.
10//!
11//! The analyses run on the flat `Arc<[Opcode]>` IR and are intentionally
12//! conservative — when uncertain, they return the `Unknown` top element of
13//! the lattice.
14
15use std::sync::Arc;
16use std::collections::HashMap;
17
18use super::vm::{Program, Opcode, BuiltinMethod};
19use super::ast::KindType;
20
21// ── Type lattice ──────────────────────────────────────────────────────────────
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
24pub enum VType {
25    /// Bottom: no value (unreachable).
26    Bottom,
27    Null,
28    Bool,
29    Int,
30    Float,
31    /// Numeric — Int or Float (partially refined).
32    Num,
33    Str,
34    Arr,
35    Obj,
36    /// Top: any type possible.
37    Unknown,
38}
39
40impl VType {
41    /// Least upper bound (join) for the lattice.
42    pub fn join(self, other: VType) -> VType {
43        if self == other { return self; }
44        match (self, other) {
45            (VType::Bottom, x) | (x, VType::Bottom) => x,
46            (VType::Int, VType::Float) | (VType::Float, VType::Int)
47                | (VType::Int, VType::Num) | (VType::Num, VType::Int)
48                | (VType::Float, VType::Num) | (VType::Num, VType::Float)
49                => VType::Num,
50            _ => VType::Unknown,
51        }
52    }
53
54    pub fn is_array_like(self) -> bool { matches!(self, VType::Arr) }
55    pub fn is_object_like(self) -> bool { matches!(self, VType::Obj) }
56    pub fn is_numeric(self) -> bool { matches!(self, VType::Int | VType::Float | VType::Num) }
57    pub fn is_string(self) -> bool { matches!(self, VType::Str) }
58}
59
60// ── Null-ness lattice ─────────────────────────────────────────────────────────
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
63pub enum Nullness {
64    /// Proven null.
65    AlwaysNull,
66    /// Proven non-null.
67    NonNull,
68    /// May or may not be null.
69    MaybeNull,
70}
71
72impl Nullness {
73    pub fn join(self, other: Nullness) -> Nullness {
74        if self == other { return self; }
75        Nullness::MaybeNull
76    }
77}
78
79// ── Cardinality lattice ───────────────────────────────────────────────────────
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
82pub enum Cardinality {
83    /// Exactly 0 elements (empty).
84    Zero,
85    /// Exactly 1 element (scalar-wrapped or unwrapped).
86    One,
87    /// 0 or 1 (e.g. result of `?` quantifier).
88    ZeroOrOne,
89    /// Multiple elements possible.
90    Many,
91    /// Not an array (scalar domain).
92    NotArray,
93    /// Unknown shape.
94    Unknown,
95}
96
97impl Cardinality {
98    pub fn join(self, other: Cardinality) -> Cardinality {
99        if self == other { return self; }
100        match (self, other) {
101            (Cardinality::Zero, Cardinality::One)
102                | (Cardinality::One, Cardinality::Zero) => Cardinality::ZeroOrOne,
103            _ => Cardinality::Unknown,
104        }
105    }
106}
107
108// ── Abstract value ────────────────────────────────────────────────────────────
109
110#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111pub struct AbstractVal {
112    pub ty:   VType,
113    pub null: Nullness,
114    pub card: Cardinality,
115}
116
117impl AbstractVal {
118    pub const UNKNOWN: Self = Self {
119        ty: VType::Unknown, null: Nullness::MaybeNull, card: Cardinality::Unknown,
120    };
121    pub const NULL: Self = Self {
122        ty: VType::Null, null: Nullness::AlwaysNull, card: Cardinality::NotArray,
123    };
124    pub fn scalar(ty: VType) -> Self {
125        Self { ty, null: Nullness::NonNull, card: Cardinality::NotArray }
126    }
127    pub fn array() -> Self {
128        Self { ty: VType::Arr, null: Nullness::NonNull, card: Cardinality::Many }
129    }
130    pub fn object() -> Self {
131        Self { ty: VType::Obj, null: Nullness::NonNull, card: Cardinality::NotArray }
132    }
133    pub fn join(self, other: AbstractVal) -> AbstractVal {
134        AbstractVal {
135            ty:   self.ty.join(other.ty),
136            null: self.null.join(other.null),
137            card: self.card.join(other.card),
138        }
139    }
140}
141
142// ── Forward type inference ────────────────────────────────────────────────────
143
144/// Walk opcodes of `program` forward, simulating a stack of `AbstractVal`s.
145/// Returns the top-of-stack abstract value at program end (i.e. the result type).
146pub fn infer_result_type(program: &Program) -> AbstractVal {
147    let mut stack: Vec<AbstractVal> = Vec::with_capacity(16);
148    let mut env: HashMap<Arc<str>, AbstractVal> = HashMap::new();
149    for op in program.ops.iter() { apply_op_env(op, &mut stack, &mut env); }
150    stack.pop().unwrap_or(AbstractVal::UNKNOWN)
151}
152
153/// Same as `infer_result_type` but exposes the bindings environment after
154/// the program finishes — useful for debugging the interprocedural flow.
155pub fn infer_result_type_with_env(program: &Program)
156    -> (AbstractVal, HashMap<Arc<str>, AbstractVal>)
157{
158    let mut stack: Vec<AbstractVal> = Vec::with_capacity(16);
159    let mut env: HashMap<Arc<str>, AbstractVal> = HashMap::new();
160    for op in program.ops.iter() { apply_op_env(op, &mut stack, &mut env); }
161    (stack.pop().unwrap_or(AbstractVal::UNKNOWN), env)
162}
163
164fn apply_op_env(op: &Opcode, stack: &mut Vec<AbstractVal>,
165                env: &mut HashMap<Arc<str>, AbstractVal>) {
166    // Handle binding & ident lookup here, delegate scalar cases to apply_op.
167    match op {
168        Opcode::LoadIdent(name) => {
169            let av = env.get(name).copied().unwrap_or(AbstractVal::UNKNOWN);
170            stack.push(av);
171        }
172        Opcode::BindVar(name) => {
173            // TOS preserved; record type.
174            if let Some(top) = stack.last().copied() { env.insert(name.clone(), top); }
175        }
176        Opcode::StoreVar(name) => {
177            let v = stack.pop().unwrap_or(AbstractVal::UNKNOWN);
178            env.insert(name.clone(), v);
179        }
180        Opcode::LetExpr { name, body } => {
181            let init = stack.pop().unwrap_or(AbstractVal::UNKNOWN);
182            let saved = env.get(name).copied();
183            env.insert(name.clone(), init);
184            let mut sub_stack: Vec<AbstractVal> = Vec::with_capacity(8);
185            for op2 in body.ops.iter() { apply_op_env(op2, &mut sub_stack, env); }
186            let res = sub_stack.pop().unwrap_or(AbstractVal::UNKNOWN);
187            // Restore shadowed binding.
188            match saved {
189                Some(v) => { env.insert(name.clone(), v); }
190                None    => { env.remove(name); }
191            }
192            stack.push(res);
193        }
194        _ => apply_op(op, stack),
195    }
196}
197
198fn apply_op(op: &Opcode, stack: &mut Vec<AbstractVal>) {
199    macro_rules! pop2 { () => {{ let r = stack.pop().unwrap_or(AbstractVal::UNKNOWN); let l = stack.pop().unwrap_or(AbstractVal::UNKNOWN); (l, r) }} }
200    macro_rules! pop1 { () => { stack.pop().unwrap_or(AbstractVal::UNKNOWN) } }
201    match op {
202        Opcode::PushNull      => stack.push(AbstractVal::NULL),
203        Opcode::PushBool(_)   => stack.push(AbstractVal::scalar(VType::Bool)),
204        Opcode::PushInt(_)    => stack.push(AbstractVal::scalar(VType::Int)),
205        Opcode::PushFloat(_)  => stack.push(AbstractVal::scalar(VType::Float)),
206        Opcode::PushStr(_)    => stack.push(AbstractVal::scalar(VType::Str)),
207        Opcode::PushRoot | Opcode::PushCurrent | Opcode::LoadIdent(_)
208                              => stack.push(AbstractVal::UNKNOWN),
209        Opcode::GetField(_) | Opcode::OptField(_) => {
210            pop1!();
211            stack.push(AbstractVal::UNKNOWN);
212        }
213        Opcode::GetIndex(_) | Opcode::DynIndex(_) => {
214            pop1!();
215            stack.push(AbstractVal::UNKNOWN);
216        }
217        Opcode::GetSlice(_, _) => {
218            pop1!();
219            stack.push(AbstractVal::array());
220        }
221        Opcode::Descendant(_) | Opcode::DescendAll => {
222            pop1!();
223            stack.push(AbstractVal::array());
224        }
225        Opcode::InlineFilter(_) => {
226            pop1!();
227            stack.push(AbstractVal::array());
228        }
229        Opcode::Quantifier(kind) => {
230            pop1!();
231            use super::ast::QuantifierKind;
232            let card = match kind {
233                QuantifierKind::First => Cardinality::ZeroOrOne,
234                QuantifierKind::One   => Cardinality::One,
235            };
236            stack.push(AbstractVal { ty: VType::Unknown, null: Nullness::MaybeNull, card });
237        }
238        Opcode::RootChain(_) => stack.push(AbstractVal::UNKNOWN),
239        Opcode::FilterCount(_) => {
240            pop1!();
241            stack.push(AbstractVal::scalar(VType::Int));
242        }
243        Opcode::FindFirst(_) | Opcode::FindOne(_) => {
244            pop1!();
245            stack.push(AbstractVal::UNKNOWN);
246        }
247        Opcode::FilterMap { .. } | Opcode::FilterFilter { .. } | Opcode::MapMap { .. } => {
248            pop1!();
249            stack.push(AbstractVal::array());
250        }
251        Opcode::MapSum(_) => {
252            pop1!();
253            stack.push(AbstractVal::scalar(VType::Num));
254        }
255        Opcode::MapAvg(_) => {
256            pop1!();
257            stack.push(AbstractVal::scalar(VType::Float));
258        }
259        Opcode::TopN { .. } | Opcode::MapFlatten(_) | Opcode::FilterTakeWhile { .. }
260            | Opcode::FilterDropWhile { .. } | Opcode::MapUnique(_)
261            | Opcode::EquiJoin { .. } => {
262            pop1!();
263            stack.push(AbstractVal::array());
264        }
265        Opcode::Add | Opcode::Sub | Opcode::Mul | Opcode::Mod => {
266            let (l, r) = pop2!();
267            stack.push(AbstractVal::scalar(l.ty.join(r.ty)));
268        }
269        Opcode::Div => {
270            pop2!();
271            stack.push(AbstractVal::scalar(VType::Float));
272        }
273        Opcode::Eq | Opcode::Neq | Opcode::Lt | Opcode::Lte
274            | Opcode::Gt | Opcode::Gte | Opcode::Fuzzy => {
275            pop2!();
276            stack.push(AbstractVal::scalar(VType::Bool));
277        }
278        Opcode::Not => { pop1!(); stack.push(AbstractVal::scalar(VType::Bool)); }
279        Opcode::Neg => { let v = pop1!(); stack.push(AbstractVal::scalar(v.ty)); }
280        Opcode::AndOp(_) | Opcode::OrOp(_) => {
281            pop1!();
282            stack.push(AbstractVal::scalar(VType::Bool));
283        }
284        Opcode::CoalesceOp(_) => { pop1!(); stack.push(AbstractVal::UNKNOWN); }
285        Opcode::CallMethod(call) | Opcode::CallOptMethod(call) => {
286            pop1!();
287            stack.push(method_result_type(call.method));
288        }
289        Opcode::MakeObj(_) => stack.push(AbstractVal::object()),
290        Opcode::MakeArr(_) => stack.push(AbstractVal::array()),
291        Opcode::FString(_) => stack.push(AbstractVal::scalar(VType::Str)),
292        Opcode::KindCheck { .. } => { pop1!(); stack.push(AbstractVal::scalar(VType::Bool)); }
293        Opcode::SetCurrent => {} // TOS stays as current
294        Opcode::BindVar(_) => {} // TOS preserved
295        Opcode::StoreVar(_) => { pop1!(); }
296        Opcode::BindObjDestructure(_) | Opcode::BindArrDestructure(_) => {} // TOS preserved
297        Opcode::LetExpr { .. } => {
298            pop1!();
299            stack.push(AbstractVal::UNKNOWN);
300        }
301        Opcode::ListComp(_) | Opcode::SetComp(_) => stack.push(AbstractVal::array()),
302        Opcode::DictComp(_) => stack.push(AbstractVal::object()),
303        Opcode::GetPointer(_) => stack.push(AbstractVal::UNKNOWN),
304        Opcode::PatchEval(_) => stack.push(AbstractVal::UNKNOWN),
305        Opcode::CastOp(ty) => {
306            pop1!();
307            use super::ast::CastType;
308            let av = match ty {
309                CastType::Int    => AbstractVal::scalar(VType::Int),
310                CastType::Float  => AbstractVal::scalar(VType::Float),
311                CastType::Number => AbstractVal::scalar(VType::Num),
312                CastType::Str    => AbstractVal::scalar(VType::Str),
313                CastType::Bool   => AbstractVal::scalar(VType::Bool),
314                CastType::Array  => AbstractVal::array(),
315                CastType::Object => AbstractVal::object(),
316                CastType::Null   => AbstractVal::NULL,
317            };
318            stack.push(av);
319        }
320    }
321}
322
323/// Static result-type mapping for builtin methods (conservative).
324pub fn method_result_type(m: BuiltinMethod) -> AbstractVal {
325    use BuiltinMethod::*;
326    match m {
327        // → Int
328        Len | Count | Sum | IndexOf | LastIndexOf => AbstractVal::scalar(VType::Int),
329        // → Bool
330        Any | All | Has | Missing | Includes | StartsWith | EndsWith => AbstractVal::scalar(VType::Bool),
331        // → Str
332        Upper | Lower | Capitalize | TitleCase | Trim | TrimLeft | TrimRight
333            | ToString | ToJson | ToBase64 | FromBase64 | UrlEncode | UrlDecode
334            | HtmlEscape | HtmlUnescape | Repeat | PadLeft | PadRight
335            | Replace | ReplaceAll | StripPrefix | StripSuffix | Indent | Dedent
336            | Join | ToCsv | ToTsv | Type
337            => AbstractVal::scalar(VType::Str),
338        // → Float
339        Avg => AbstractVal::scalar(VType::Float),
340        // → Num (Min/Max depend on input; treat as unknown scalar)
341        Min | Max | ToNumber => AbstractVal::scalar(VType::Num),
342        // → Bool (to_bool)
343        ToBool => AbstractVal::scalar(VType::Bool),
344        // → Arr
345        Keys | Values | Entries | ToPairs | Reverse | Unique | Flatten | Compact
346            | Chars | Lines | Words | Split | Sort | Filter | Map | FlatMap
347            | Enumerate | Pairwise | Window | Chunk | TakeWhile | DropWhile
348            | Accumulate | Zip | ZipLongest | Diff | Intersect | Union
349            | Append | Prepend | Remove | Matches | Scan | Slice
350            => AbstractVal::array(),
351        // → Obj
352        FromPairs | Invert | Pick | Omit | Merge | DeepMerge | Defaults | Rename
353            | TransformKeys | TransformValues | FilterKeys | FilterValues | Pivot
354            | GroupBy | CountBy | IndexBy | Partition | FlattenKeys | UnflattenKeys
355            | SetPath | DelPath | DelPaths | Set | Update
356            => AbstractVal::object(),
357        // → various
358        First | Last | Nth | GetPath => AbstractVal::UNKNOWN,
359        HasPath => AbstractVal::scalar(VType::Bool),
360        FromJson | Or => AbstractVal::UNKNOWN,
361        EquiJoin => AbstractVal::array(),
362        Unknown => AbstractVal::UNKNOWN,
363    }
364}
365
366// ── Alias / use-count analysis ────────────────────────────────────────────────
367
368/// Count `LoadIdent(name)` references across an entire program (including sub-programs).
369pub fn count_ident_uses(program: &Program, name: &str) -> usize {
370    let mut n = 0;
371    count_ident_uses_in_ops(&program.ops, name, &mut n);
372    n
373}
374
375fn count_ident_uses_in_ops(ops: &[Opcode], name: &str, acc: &mut usize) {
376    for op in ops {
377        match op {
378            Opcode::LoadIdent(s) if s.as_ref() == name => *acc += 1,
379            Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
380                | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
381                | Opcode::FindFirst(p) | Opcode::FindOne(p)
382                | Opcode::DynIndex(p)
383                | Opcode::MapSum(p) | Opcode::MapAvg(p)
384                | Opcode::MapFlatten(p) => count_ident_uses_in_ops(&p.ops, name, acc),
385            Opcode::FilterTakeWhile { pred, stop } => {
386                count_ident_uses_in_ops(&pred.ops, name, acc);
387                count_ident_uses_in_ops(&stop.ops, name, acc);
388            }
389            Opcode::FilterMap { pred, map } => {
390                count_ident_uses_in_ops(&pred.ops, name, acc);
391                count_ident_uses_in_ops(&map.ops, name, acc);
392            }
393            Opcode::FilterFilter { p1, p2 } => {
394                count_ident_uses_in_ops(&p1.ops, name, acc);
395                count_ident_uses_in_ops(&p2.ops, name, acc);
396            }
397            Opcode::MapMap { f1, f2 } => {
398                count_ident_uses_in_ops(&f1.ops, name, acc);
399                count_ident_uses_in_ops(&f2.ops, name, acc);
400            }
401            Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
402                for p in c.sub_progs.iter() { count_ident_uses_in_ops(&p.ops, name, acc); }
403            }
404            Opcode::LetExpr { body, .. } => count_ident_uses_in_ops(&body.ops, name, acc),
405            Opcode::ListComp(spec) | Opcode::SetComp(spec) => {
406                count_ident_uses_in_ops(&spec.expr.ops, name, acc);
407                count_ident_uses_in_ops(&spec.iter.ops, name, acc);
408                if let Some(c) = &spec.cond { count_ident_uses_in_ops(&c.ops, name, acc); }
409            }
410            Opcode::DictComp(spec) => {
411                count_ident_uses_in_ops(&spec.key.ops, name, acc);
412                count_ident_uses_in_ops(&spec.val.ops, name, acc);
413                count_ident_uses_in_ops(&spec.iter.ops, name, acc);
414                if let Some(c) = &spec.cond { count_ident_uses_in_ops(&c.ops, name, acc); }
415            }
416            Opcode::MakeObj(entries) => {
417                use super::vm::CompiledObjEntry;
418                for e in entries.iter() {
419                    match e {
420                        CompiledObjEntry::Short(_) => {}
421                        CompiledObjEntry::Kv { prog, cond, .. } => {
422                            count_ident_uses_in_ops(&prog.ops, name, acc);
423                            if let Some(c) = cond { count_ident_uses_in_ops(&c.ops, name, acc); }
424                        }
425                        CompiledObjEntry::Dynamic { key, val } => {
426                            count_ident_uses_in_ops(&key.ops, name, acc);
427                            count_ident_uses_in_ops(&val.ops, name, acc);
428                        }
429                        CompiledObjEntry::Spread(p) => count_ident_uses_in_ops(&p.ops, name, acc),
430                        CompiledObjEntry::SpreadDeep(p) => count_ident_uses_in_ops(&p.ops, name, acc),
431                    }
432                }
433            }
434            Opcode::MakeArr(progs) => {
435                for p in progs.iter() { count_ident_uses_in_ops(&p.ops, name, acc); }
436            }
437            Opcode::FString(parts) => {
438                use super::vm::CompiledFSPart;
439                for p in parts.iter() {
440                    if let CompiledFSPart::Interp { prog, .. } = p {
441                        count_ident_uses_in_ops(&prog.ops, name, acc);
442                    }
443                }
444            }
445            _ => {}
446        }
447    }
448}
449
450// ── Projection set (field access pattern) ─────────────────────────────────────
451
452/// Collect all field names directly accessed from a program.  Used by
453/// projection-pushdown analysis: if an object is later accessed only via these
454/// fields, other fields can be trimmed early.
455pub fn collect_accessed_fields(program: &Program) -> Vec<Arc<str>> {
456    let mut set = Vec::new();
457    collect_fields_in_ops(&program.ops, &mut set);
458    set
459}
460
461fn collect_fields_in_ops(ops: &[Opcode], acc: &mut Vec<Arc<str>>) {
462    for op in ops {
463        match op {
464            Opcode::GetField(k) | Opcode::OptField(k) | Opcode::Descendant(k) => {
465                if !acc.iter().any(|a: &Arc<str>| a == k) { acc.push(k.clone()); }
466            }
467            Opcode::RootChain(chain) => {
468                for k in chain.iter() {
469                    if !acc.iter().any(|a: &Arc<str>| a == k) { acc.push(k.clone()); }
470                }
471            }
472            Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
473                | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
474                | Opcode::FindFirst(p) | Opcode::FindOne(p)
475                | Opcode::DynIndex(p)
476                | Opcode::MapSum(p) | Opcode::MapAvg(p)
477                | Opcode::MapFlatten(p) => collect_fields_in_ops(&p.ops, acc),
478            Opcode::FilterTakeWhile { pred, stop } => {
479                collect_fields_in_ops(&pred.ops, acc);
480                collect_fields_in_ops(&stop.ops, acc);
481            }
482            Opcode::FilterMap { pred, map } => {
483                collect_fields_in_ops(&pred.ops, acc);
484                collect_fields_in_ops(&map.ops, acc);
485            }
486            Opcode::FilterFilter { p1, p2 } => {
487                collect_fields_in_ops(&p1.ops, acc);
488                collect_fields_in_ops(&p2.ops, acc);
489            }
490            Opcode::MapMap { f1, f2 } => {
491                collect_fields_in_ops(&f1.ops, acc);
492                collect_fields_in_ops(&f2.ops, acc);
493            }
494            Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
495                for p in c.sub_progs.iter() { collect_fields_in_ops(&p.ops, acc); }
496            }
497            Opcode::LetExpr { body, .. } => collect_fields_in_ops(&body.ops, acc),
498            _ => {}
499        }
500    }
501}
502
503// ── Structural opcode hashing (for CSE) ───────────────────────────────────────
504
505/// Hash a program's opcode sequence into a stable identifier.  Two programs
506/// with the same opcodes hash to the same value — enables CSE across `let`
507/// initialisers or parallel `->` binds.
508pub fn program_signature(program: &Program) -> u64 {
509    use std::collections::hash_map::DefaultHasher;
510    use std::hash::Hasher;
511    let mut h = DefaultHasher::new();
512    hash_ops(&program.ops, &mut h);
513    h.finish()
514}
515
516fn hash_ops(ops: &[Opcode], h: &mut impl std::hash::Hasher) {
517    use std::hash::Hash;
518    for op in ops {
519        // Discriminant + data for stable signature.
520        std::mem::discriminant(op).hash(h);
521        match op {
522            Opcode::PushInt(n) => n.hash(h),
523            Opcode::PushStr(s) => s.as_bytes().hash(h),
524            Opcode::PushBool(b) => b.hash(h),
525            Opcode::GetField(k) | Opcode::OptField(k) | Opcode::Descendant(k)
526                | Opcode::LoadIdent(k) | Opcode::GetPointer(k)
527                => k.as_bytes().hash(h),
528            Opcode::GetIndex(i) => i.hash(h),
529            Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
530                (c.method as u8).hash(h);
531                for p in c.sub_progs.iter() { hash_ops(&p.ops, h); }
532            }
533            Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
534                | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
535                | Opcode::FindFirst(p) | Opcode::FindOne(p)
536                | Opcode::DynIndex(p)
537                | Opcode::MapSum(p) | Opcode::MapAvg(p)
538                | Opcode::MapFlatten(p) => hash_ops(&p.ops, h),
539            Opcode::FilterTakeWhile { pred, stop } => {
540                hash_ops(&pred.ops, h);
541                hash_ops(&stop.ops, h);
542            }
543            Opcode::RootChain(chain) => {
544                for k in chain.iter() { k.as_bytes().hash(h); }
545            }
546            _ => {}
547        }
548    }
549}
550
551// ── Common-subexpression detection ────────────────────────────────────────────
552
553/// Find sub-programs (via `Arc<Program>` pointers inside opcodes) that appear
554/// multiple times across the program tree.  Returns a map of
555/// `signature → count` for analysis / potential reuse.
556pub fn find_common_subexprs(program: &Program) -> HashMap<u64, usize> {
557    let mut map: HashMap<u64, usize> = HashMap::new();
558    walk_subprograms(&program.ops, &mut map);
559    map.retain(|_, &mut n| n >= 2);
560    map
561}
562
563fn walk_subprograms(ops: &[Opcode], map: &mut HashMap<u64, usize>) {
564    for op in ops {
565        let sub_progs: Vec<&Arc<Program>> = match op {
566            Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
567                | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
568                | Opcode::FindFirst(p) | Opcode::FindOne(p)
569                | Opcode::DynIndex(p)
570                | Opcode::MapSum(p) | Opcode::MapAvg(p)
571                | Opcode::MapFlatten(p) => vec![p],
572            Opcode::FilterTakeWhile { pred, stop } => vec![pred, stop],
573            Opcode::FilterMap { pred, map: m } => vec![pred, m],
574            Opcode::FilterFilter { p1, p2 } => vec![p1, p2],
575            Opcode::MapMap { f1, f2 } => vec![f1, f2],
576            Opcode::CallMethod(c) | Opcode::CallOptMethod(c) =>
577                c.sub_progs.iter().collect(),
578            Opcode::LetExpr { body, .. } => vec![body],
579            Opcode::MakeArr(progs) => progs.iter().collect(),
580            Opcode::MakeObj(entries) => {
581                use super::vm::CompiledObjEntry;
582                let mut v = Vec::new();
583                for e in entries.iter() {
584                    match e {
585                        CompiledObjEntry::Short(_) => {}
586                        CompiledObjEntry::Kv { prog, cond, .. } => {
587                            v.push(prog);
588                            if let Some(c) = cond { v.push(c); }
589                        }
590                        CompiledObjEntry::Dynamic { key, val } => { v.push(key); v.push(val); }
591                        CompiledObjEntry::Spread(p) => v.push(p),
592                        CompiledObjEntry::SpreadDeep(p) => v.push(p),
593                    }
594                }
595                v
596            }
597            _ => continue,
598        };
599        for p in sub_progs {
600            let sig = program_signature(p);
601            *map.entry(sig).or_insert(0) += 1;
602            walk_subprograms(&p.ops, map);
603        }
604    }
605}
606
607// ── AST-level ident use walker (for dead-let) ────────────────────────────────
608
609/// True if any sub-expression references `name` as a bare identifier.
610/// Walks the AST without compiling.  Shadowing by inner `let` / `lambda` /
611/// comprehension binders is respected: inner bindings with the same name
612/// hide the outer one.
613pub fn expr_uses_ident(expr: &super::ast::Expr, name: &str) -> bool {
614    use super::ast::{Expr, Step, PipeStep, BindTarget, FStringPart, ArrayElem, ObjField, Arg};
615    match expr {
616        Expr::Ident(n) => n == name,
617        Expr::Null | Expr::Bool(_) | Expr::Int(_) | Expr::Float(_)
618            | Expr::Str(_) | Expr::Root | Expr::Current => false,
619        Expr::FString(parts) => parts.iter().any(|p| match p {
620            FStringPart::Lit(_) => false,
621            FStringPart::Interp { expr, .. } => expr_uses_ident(expr, name),
622        }),
623        Expr::Chain(base, steps) => {
624            if expr_uses_ident(base, name) { return true; }
625            steps.iter().any(|s| match s {
626                Step::DynIndex(e) | Step::InlineFilter(e) => expr_uses_ident(e, name),
627                Step::Method(_, args) | Step::OptMethod(_, args) =>
628                    args.iter().any(|a| match a {
629                        Arg::Pos(e) | Arg::Named(_, e) => expr_uses_ident(e, name),
630                    }),
631                _ => false,
632            })
633        }
634        Expr::BinOp(l, _, r) => expr_uses_ident(l, name) || expr_uses_ident(r, name),
635        Expr::UnaryNeg(e) | Expr::Not(e) => expr_uses_ident(e, name),
636        Expr::Kind { expr, .. } => expr_uses_ident(expr, name),
637        Expr::Coalesce(l, r) => expr_uses_ident(l, name) || expr_uses_ident(r, name),
638        Expr::Object(fields) => fields.iter().any(|f| match f {
639            ObjField::Kv { val, cond, .. } =>
640                expr_uses_ident(val, name)
641                || cond.as_ref().map_or(false, |c| expr_uses_ident(c, name)),
642            ObjField::Short(n) => n == name,
643            ObjField::Dynamic { key, val } =>
644                expr_uses_ident(key, name) || expr_uses_ident(val, name),
645            ObjField::Spread(e) => expr_uses_ident(e, name),
646            ObjField::SpreadDeep(e) => expr_uses_ident(e, name),
647        }),
648        Expr::Array(elems) => elems.iter().any(|e| match e {
649            ArrayElem::Expr(e) | ArrayElem::Spread(e) => expr_uses_ident(e, name),
650        }),
651        Expr::Pipeline { base, steps } => {
652            if expr_uses_ident(base, name) { return true; }
653            steps.iter().any(|s| match s {
654                PipeStep::Forward(e) => expr_uses_ident(e, name),
655                PipeStep::Bind(bt) => match bt {
656                    BindTarget::Name(n) => n == name,
657                    BindTarget::Obj { fields, rest } =>
658                        fields.iter().any(|f| f == name)
659                        || rest.as_ref().map_or(false, |r| r == name),
660                    BindTarget::Arr(ns) => ns.iter().any(|n| n == name),
661                },
662            })
663        }
664        Expr::ListComp { expr, vars, iter, cond }
665        | Expr::SetComp  { expr, vars, iter, cond }
666        | Expr::GenComp  { expr, vars, iter, cond } => {
667            if expr_uses_ident(iter, name) { return true; }
668            if vars.iter().any(|v| v == name) { return false; } // shadowed
669            expr_uses_ident(expr, name)
670                || cond.as_ref().map_or(false, |c| expr_uses_ident(c, name))
671        }
672        Expr::DictComp { key, val, vars, iter, cond } => {
673            if expr_uses_ident(iter, name) { return true; }
674            if vars.iter().any(|v| v == name) { return false; }
675            expr_uses_ident(key, name) || expr_uses_ident(val, name)
676                || cond.as_ref().map_or(false, |c| expr_uses_ident(c, name))
677        }
678        Expr::Lambda { params, body } => {
679            if params.iter().any(|p| p == name) { return false; }
680            expr_uses_ident(body, name)
681        }
682        Expr::Let { name: n, init, body } => {
683            if expr_uses_ident(init, name) { return true; }
684            if n == name { return false; } // inner let shadows
685            expr_uses_ident(body, name)
686        }
687        Expr::GlobalCall { args, .. } => args.iter().any(|a| match a {
688            Arg::Pos(e) | Arg::Named(_, e) => expr_uses_ident(e, name),
689        }),
690        Expr::Cast { expr, .. } => expr_uses_ident(expr, name),
691        Expr::Patch { root, ops } => {
692            use super::ast::PathStep;
693            if expr_uses_ident(root, name) { return true; }
694            ops.iter().any(|op| {
695                op.path.iter().any(|s| match s {
696                    PathStep::WildcardFilter(e) => expr_uses_ident(e, name),
697                    _ => false,
698                })
699                || expr_uses_ident(&op.val, name)
700                || op.cond.as_ref().map_or(false, |c| expr_uses_ident(c, name))
701            })
702        }
703        Expr::DeleteMark => false,
704    }
705}
706
707/// True if the expression is pure — no side-effecting global calls or
708/// unknown methods.  Enables dropping unused `let` initialisers safely.
709pub fn expr_is_pure(expr: &super::ast::Expr) -> bool {
710    use super::ast::{Expr, Step, Arg};
711    match expr {
712        Expr::Null | Expr::Bool(_) | Expr::Int(_) | Expr::Float(_)
713            | Expr::Str(_) | Expr::Root | Expr::Current | Expr::Ident(_) => true,
714        Expr::FString(_) => true,
715        Expr::Chain(base, steps) => {
716            if !expr_is_pure(base) { return false; }
717            steps.iter().all(|s| match s {
718                Step::DynIndex(e) | Step::InlineFilter(e) => expr_is_pure(e),
719                Step::Method(_, args) | Step::OptMethod(_, args) =>
720                    args.iter().all(|a| match a {
721                        Arg::Pos(e) | Arg::Named(_, e) => expr_is_pure(e),
722                    }),
723                _ => true,
724            })
725        }
726        Expr::BinOp(l, _, r) | Expr::Coalesce(l, r) =>
727            expr_is_pure(l) && expr_is_pure(r),
728        Expr::UnaryNeg(e) | Expr::Not(e) | Expr::Kind { expr: e, .. } =>
729            expr_is_pure(e),
730        // All Jetro exprs are pure in practice; global calls may throw but no side effects.
731        _ => true,
732    }
733}
734
735// ── CSE: canonicalise identical sub-programs ─────────────────────────────────
736
737/// Walk `program` and replace every `Arc<Program>` inside opcodes with a
738/// canonical `Arc` keyed by `program_signature`.  Structurally-identical
739/// sub-programs end up pointing at the same allocation, reducing memory
740/// and enabling downstream caches to hit on the same key.
741///
742/// Returns a new `Program` with deduplicated sub-programs.
743pub fn dedup_subprograms(program: &Program) -> Arc<Program> {
744    let mut cache: HashMap<u64, Arc<Program>> = HashMap::new();
745    dedup_rec(program, &mut cache)
746}
747
748fn dedup_rec(program: &Program, cache: &mut HashMap<u64, Arc<Program>>) -> Arc<Program> {
749    let sig = program_signature(program);
750    if let Some(a) = cache.get(&sig) { return Arc::clone(a); }
751    let new_ops: Vec<Opcode> = program.ops.iter().map(|op| rewrite_op(op, cache)).collect();
752    let out = Arc::new(Program {
753        ops:           new_ops.into(),
754        source:        program.source.clone(),
755        id:            program.id,
756        is_structural: program.is_structural,
757    });
758    cache.insert(sig, Arc::clone(&out));
759    out
760}
761
762fn rewrite_op(op: &Opcode, cache: &mut HashMap<u64, Arc<Program>>) -> Opcode {
763    use super::vm::{CompiledObjEntry, CompiledFSPart, CompSpec, DictCompSpec};
764    match op {
765        Opcode::AndOp(p)        => Opcode::AndOp(dedup_rec(p, cache)),
766        Opcode::OrOp(p)         => Opcode::OrOp(dedup_rec(p, cache)),
767        Opcode::CoalesceOp(p)   => Opcode::CoalesceOp(dedup_rec(p, cache)),
768        Opcode::InlineFilter(p) => Opcode::InlineFilter(dedup_rec(p, cache)),
769        Opcode::FilterCount(p)  => Opcode::FilterCount(dedup_rec(p, cache)),
770        Opcode::MapSum(p)       => Opcode::MapSum(dedup_rec(p, cache)),
771        Opcode::MapAvg(p)       => Opcode::MapAvg(dedup_rec(p, cache)),
772        Opcode::MapFlatten(p)   => Opcode::MapFlatten(dedup_rec(p, cache)),
773        Opcode::FilterTakeWhile { pred, stop } => Opcode::FilterTakeWhile {
774            pred: dedup_rec(pred, cache),
775            stop: dedup_rec(stop, cache),
776        },
777        Opcode::FindFirst(p)    => Opcode::FindFirst(dedup_rec(p, cache)),
778        Opcode::FindOne(p)      => Opcode::FindOne(dedup_rec(p, cache)),
779        Opcode::DynIndex(p)     => Opcode::DynIndex(dedup_rec(p, cache)),
780        Opcode::FilterMap { pred, map } => Opcode::FilterMap {
781            pred: dedup_rec(pred, cache),
782            map:  dedup_rec(map,  cache),
783        },
784        Opcode::FilterFilter { p1, p2 } => Opcode::FilterFilter {
785            p1: dedup_rec(p1, cache),
786            p2: dedup_rec(p2, cache),
787        },
788        Opcode::MapMap { f1, f2 } => Opcode::MapMap {
789            f1: dedup_rec(f1, cache),
790            f2: dedup_rec(f2, cache),
791        },
792        Opcode::LetExpr { name, body } => Opcode::LetExpr {
793            name: name.clone(),
794            body: dedup_rec(body, cache),
795        },
796        Opcode::CallMethod(c) => Opcode::CallMethod(rewrite_call(c, cache)),
797        Opcode::CallOptMethod(c) => Opcode::CallOptMethod(rewrite_call(c, cache)),
798        Opcode::MakeArr(progs) => {
799            let new_progs: Vec<Arc<Program>> = progs.iter().map(|p| dedup_rec(p, cache)).collect();
800            Opcode::MakeArr(new_progs.into())
801        }
802        Opcode::MakeObj(entries) => {
803            let new_entries: Vec<CompiledObjEntry> = entries.iter().map(|e| match e {
804                CompiledObjEntry::Short(s) => CompiledObjEntry::Short(s.clone()),
805                CompiledObjEntry::Kv { key, prog, optional, cond } => CompiledObjEntry::Kv {
806                    key: key.clone(),
807                    prog: dedup_rec(prog, cache),
808                    optional: *optional,
809                    cond: cond.as_ref().map(|c| dedup_rec(c, cache)),
810                },
811                CompiledObjEntry::Dynamic { key, val } => CompiledObjEntry::Dynamic {
812                    key: dedup_rec(key, cache),
813                    val: dedup_rec(val, cache),
814                },
815                CompiledObjEntry::Spread(p) => CompiledObjEntry::Spread(dedup_rec(p, cache)),
816                CompiledObjEntry::SpreadDeep(p) => CompiledObjEntry::SpreadDeep(dedup_rec(p, cache)),
817            }).collect();
818            Opcode::MakeObj(new_entries.into())
819        }
820        Opcode::FString(parts) => {
821            let new_parts: Vec<CompiledFSPart> = parts.iter().map(|p| match p {
822                CompiledFSPart::Lit(s) => CompiledFSPart::Lit(s.clone()),
823                CompiledFSPart::Interp { prog, fmt } => CompiledFSPart::Interp {
824                    prog: dedup_rec(prog, cache),
825                    fmt:  fmt.clone(),
826                },
827            }).collect();
828            Opcode::FString(new_parts.into())
829        }
830        Opcode::ListComp(spec) => {
831            let new_spec = CompSpec {
832                expr:  dedup_rec(&spec.expr, cache),
833                iter:  dedup_rec(&spec.iter, cache),
834                cond:  spec.cond.as_ref().map(|c| dedup_rec(c, cache)),
835                vars:  spec.vars.clone(),
836            };
837            Opcode::ListComp(Arc::new(new_spec))
838        }
839        Opcode::SetComp(spec) => {
840            let new_spec = CompSpec {
841                expr:  dedup_rec(&spec.expr, cache),
842                iter:  dedup_rec(&spec.iter, cache),
843                cond:  spec.cond.as_ref().map(|c| dedup_rec(c, cache)),
844                vars:  spec.vars.clone(),
845            };
846            Opcode::SetComp(Arc::new(new_spec))
847        }
848        Opcode::DictComp(spec) => {
849            let new_spec = DictCompSpec {
850                key:   dedup_rec(&spec.key, cache),
851                val:   dedup_rec(&spec.val, cache),
852                iter:  dedup_rec(&spec.iter, cache),
853                cond:  spec.cond.as_ref().map(|c| dedup_rec(c, cache)),
854                vars:  spec.vars.clone(),
855            };
856            Opcode::DictComp(Arc::new(new_spec))
857        }
858        _ => op.clone(),
859    }
860}
861
862fn rewrite_call(c: &Arc<super::vm::CompiledCall>, cache: &mut HashMap<u64, Arc<Program>>) -> Arc<super::vm::CompiledCall> {
863    use super::vm::CompiledCall;
864    let new_subs: Vec<Arc<Program>> = c.sub_progs.iter().map(|p| dedup_rec(p, cache)).collect();
865    Arc::new(CompiledCall {
866        method:    c.method,
867        name:      c.name.clone(),
868        sub_progs: new_subs.into(),
869        orig_args: c.orig_args.clone(),
870    })
871}
872
873// ── Cost model ────────────────────────────────────────────────────────────────
874
875/// Rough, ordinal cost estimate per opcode.  Not a wall-clock number — only
876/// useful to compare relative cost (e.g. for AndOp operand reordering).
877pub fn opcode_cost(op: &Opcode) -> u32 {
878    match op {
879        Opcode::PushNull | Opcode::PushBool(_) | Opcode::PushInt(_)
880            | Opcode::PushFloat(_) | Opcode::PushStr(_)
881            | Opcode::PushRoot | Opcode::PushCurrent | Opcode::LoadIdent(_) => 1,
882        Opcode::GetField(_) | Opcode::OptField(_) | Opcode::GetIndex(_)
883            | Opcode::RootChain(_) | Opcode::GetPointer(_) => 2,
884        Opcode::GetSlice(..) | Opcode::Descendant(_) => 5,
885        Opcode::DescendAll => 20,
886        Opcode::Not | Opcode::Neg | Opcode::SetCurrent
887            | Opcode::BindVar(_) | Opcode::StoreVar(_)
888            | Opcode::BindObjDestructure(_) | Opcode::BindArrDestructure(_) => 1,
889        Opcode::Add | Opcode::Sub | Opcode::Mul | Opcode::Div | Opcode::Mod
890            | Opcode::Eq | Opcode::Neq | Opcode::Lt | Opcode::Lte
891            | Opcode::Gt | Opcode::Gte | Opcode::Fuzzy => 2,
892        Opcode::KindCheck { .. } => 2,
893        Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p) => 2 + program_cost(p),
894        Opcode::InlineFilter(p) | Opcode::FilterCount(p)
895            | Opcode::FindFirst(p) | Opcode::FindOne(p)
896            | Opcode::MapSum(p) | Opcode::MapAvg(p)
897            | Opcode::MapFlatten(p)
898            | Opcode::DynIndex(p) => 10 + program_cost(p),
899        Opcode::FilterTakeWhile { pred, stop } => 10 + program_cost(pred) + program_cost(stop),
900        Opcode::FilterDropWhile { pred, drop } => 10 + program_cost(pred) + program_cost(drop),
901        Opcode::MapUnique(p) => 15 + program_cost(p),
902        Opcode::EquiJoin { rhs, .. } => 25 + program_cost(rhs),
903        Opcode::FilterMap { pred, map } => 10 + program_cost(pred) + program_cost(map),
904        Opcode::FilterFilter { p1, p2 } => 10 + program_cost(p1) + program_cost(p2),
905        Opcode::MapMap { f1, f2 } => 10 + program_cost(f1) + program_cost(f2),
906        Opcode::TopN { n, .. } => 15 + *n as u32,
907        Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
908            let base = match c.method {
909                BuiltinMethod::Filter | BuiltinMethod::Map | BuiltinMethod::FlatMap => 10,
910                BuiltinMethod::Sort   => 30,
911                BuiltinMethod::GroupBy | BuiltinMethod::IndexBy => 25,
912                BuiltinMethod::Len    | BuiltinMethod::Count => 2,
913                _ => 8,
914            };
915            base + c.sub_progs.iter().map(|p| program_cost(p)).sum::<u32>()
916        }
917        Opcode::MakeObj(entries) => {
918            use super::vm::CompiledObjEntry;
919            entries.iter().map(|e| match e {
920                CompiledObjEntry::Short(_) => 2,
921                CompiledObjEntry::Kv { prog, cond, .. } =>
922                    2 + program_cost(prog) + cond.as_ref().map_or(0, |c| program_cost(c)),
923                CompiledObjEntry::Dynamic { key, val } => 3 + program_cost(key) + program_cost(val),
924                CompiledObjEntry::Spread(p) => 5 + program_cost(p),
925                CompiledObjEntry::SpreadDeep(p) => 8 + program_cost(p),
926            }).sum()
927        }
928        Opcode::MakeArr(progs) => progs.iter().map(|p| 1 + program_cost(p)).sum(),
929        Opcode::FString(parts) => {
930            use super::vm::CompiledFSPart;
931            parts.iter().map(|p| match p {
932                CompiledFSPart::Lit(_) => 1,
933                CompiledFSPart::Interp { prog, .. } => 3 + program_cost(prog),
934            }).sum()
935        }
936        Opcode::ListComp(s) | Opcode::SetComp(s) =>
937            15 + program_cost(&s.expr) + program_cost(&s.iter)
938               + s.cond.as_ref().map_or(0, |c| program_cost(c)),
939        Opcode::DictComp(s) =>
940            15 + program_cost(&s.key) + program_cost(&s.val) + program_cost(&s.iter)
941               + s.cond.as_ref().map_or(0, |c| program_cost(c)),
942        Opcode::LetExpr { body, .. } => 2 + program_cost(body),
943        Opcode::Quantifier(_) => 2,
944        Opcode::CastOp(_) => 2,
945        Opcode::PatchEval(_) => 50,
946    }
947}
948
949/// Total cost of a program (sum of per-op costs).
950pub fn program_cost(program: &Program) -> u32 {
951    program.ops.iter().map(opcode_cost).sum()
952}
953
954// ── Monotonicity lattice ─────────────────────────────────────────────────────
955
956/// Tracks ordering properties of array-like values through the pipeline.
957#[derive(Debug, Clone, Copy, PartialEq, Eq)]
958pub enum Monotonicity {
959    /// Order unknown.
960    Unknown,
961    /// Ascending by natural comparator.
962    Asc,
963    /// Descending by natural comparator.
964    Desc,
965    /// Not an ordered collection.
966    NotArray,
967}
968
969impl Monotonicity {
970    pub fn after(self, op: &Opcode) -> Monotonicity {
971        match op {
972            Opcode::CallMethod(c) if c.sub_progs.is_empty() => match c.method {
973                BuiltinMethod::Sort    => Monotonicity::Asc,
974                BuiltinMethod::Reverse => match self {
975                    Monotonicity::Asc  => Monotonicity::Desc,
976                    Monotonicity::Desc => Monotonicity::Asc,
977                    x => x,
978                },
979                BuiltinMethod::Filter  => self, // order preserved
980                BuiltinMethod::Map     => Monotonicity::Unknown, // key changes
981                _ => Monotonicity::Unknown,
982            }
983            Opcode::TopN { asc, .. } => if *asc { Monotonicity::Asc } else { Monotonicity::Desc },
984            Opcode::MakeArr(_) | Opcode::ListComp(_) => Monotonicity::Unknown,
985            _ => self,
986        }
987    }
988}
989
990/// Walk program and determine monotonicity of the final result.
991pub fn infer_monotonicity(program: &Program) -> Monotonicity {
992    let mut m = Monotonicity::Unknown;
993    for op in program.ops.iter() { m = m.after(op); }
994    m
995}
996
997// ── Escape analysis ───────────────────────────────────────────────────────────
998
999/// Simple escape check: does the program's final value contain references to
1000/// the input document (i.e. survive returning)?  If not, the compiler may
1001/// emit value-copying ops rather than Arc-sharing to free the original doc.
1002///
1003/// Returns `false` only when the result is a scalar or newly-constructed
1004/// object/array with no root references.
1005pub fn escapes_doc(program: &Program) -> bool {
1006    for op in program.ops.iter() {
1007        match op {
1008            Opcode::PushRoot | Opcode::PushCurrent | Opcode::RootChain(_)
1009                | Opcode::GetField(_) | Opcode::GetIndex(_) | Opcode::GetSlice(..)
1010                | Opcode::Descendant(_) | Opcode::DescendAll
1011                | Opcode::GetPointer(_) | Opcode::OptField(_) => return true,
1012            Opcode::CallMethod(c) | Opcode::CallOptMethod(c)
1013                if c.sub_progs.iter().any(|p| escapes_doc(p)) => return true,
1014            _ => {}
1015        }
1016    }
1017    false
1018}
1019
1020// ── Selectivity scoring ──────────────────────────────────────────────────────
1021
1022/// Rough selectivity estimate for AST predicates.  Lower score → more
1023/// selective (filters out more rows).  Used to reorder `and` operands so
1024/// cheaper / more-selective predicate runs first (short-circuit friendly).
1025pub fn selectivity_score(expr: &super::ast::Expr) -> u32 {
1026    use super::ast::{Expr, BinOp};
1027    match expr {
1028        Expr::Bool(true) => 1000,        // no filtering
1029        Expr::Bool(false) => 0,          // max filtering
1030        Expr::BinOp(_, BinOp::Eq, _)    => 1,
1031        Expr::BinOp(_, BinOp::Neq, _)   => 5,
1032        Expr::BinOp(_, BinOp::Lt, _) | Expr::BinOp(_, BinOp::Gt, _)
1033            | Expr::BinOp(_, BinOp::Lte, _) | Expr::BinOp(_, BinOp::Gte, _) => 3,
1034        Expr::BinOp(_, BinOp::Fuzzy, _) => 2,
1035        Expr::BinOp(l, BinOp::And, r) =>
1036            selectivity_score(l).min(selectivity_score(r)),
1037        Expr::BinOp(l, BinOp::Or, r)  =>
1038            selectivity_score(l) + selectivity_score(r),
1039        Expr::Not(e) => 10u32.saturating_sub(selectivity_score(e)),
1040        Expr::Kind { .. } => 2,
1041        _ => 5,
1042    }
1043}
1044
1045// ── Kind-check specialisation ────────────────────────────────────────────────
1046
1047/// When a `KindCheck` is applied to a value with a statically-known type,
1048/// the check can be constant-folded to true/false at compile time.
1049pub fn fold_kind_check(val_ty: VType, target: KindType, negate: bool) -> Option<bool> {
1050    let matches = match (val_ty, target) {
1051        (VType::Null, KindType::Null)   => true,
1052        (VType::Bool, KindType::Bool)   => true,
1053        (VType::Int | VType::Float | VType::Num, KindType::Number) => true,
1054        (VType::Str,  KindType::Str)    => true,
1055        (VType::Arr,  KindType::Array)  => true,
1056        (VType::Obj,  KindType::Object) => true,
1057        (VType::Unknown, _)             => return None,
1058        _                               => false,
1059    };
1060    Some(if negate { !matches } else { matches })
1061}