Skip to main content

powdb_query/
executor.rs

1use crate::ast::*;
2use crate::canonicalize::canonicalize;
3use crate::plan::*;
4use crate::plan_cache::PlanCache;
5use crate::planner;
6use crate::result::QueryResult;
7use powdb_storage::catalog::Catalog;
8use powdb_storage::row::{decode_column, decode_row, patch_var_column_in_place, RowLayout};
9use powdb_storage::types::*;
10use powdb_storage::view::{ViewDef, ViewRegistry};
11use std::cmp::Reverse;
12use std::collections::BinaryHeap;
13use std::io;
14use std::path::Path;
15use std::sync::Mutex;
16use std::time::Instant;
17use tracing::{error, info, Level};
18
19/// Sentinel error returned by `Engine::execute_powql_readonly` when the
20/// query touches a materialized view whose backing table is dirty. The
21/// read path holds only `&self`, so it can't refresh the view — the caller
22/// is expected to recognise this prefix and retry with the write lock.
23///
24/// Mission infra-1: this is the escalation hook between the RwLock reader
25/// fast path and the generic write path. Handlers match on it verbatim.
26pub const READONLY_NEEDS_WRITE: &str = "__POWDB_READONLY_NEEDS_WRITE__";
27
28/// Plan cache capacity. Bench workloads fill ~15 slots; real apps will sit
29/// comfortably in 256. Lookup is O(1), collisions clear the cache (see
30/// `plan_cache::PlanCache::insert`).
31const PLAN_CACHE_CAPACITY: usize = 256;
32
33// ─── Mission D11 Phase 1: scalar hot-loop helpers ─────────────────────────
34//
35// These macros expand into the scan body of `agg_single_col_fast` and sit
36// inside the `for_each_row_raw` closure. They exist to:
37//
38//   1. Split the loop on presence of a predicate *outside* the hot body,
39//      so the no-predicate path (agg_sum/agg_min/agg_max bench workloads)
40//      never pays the `Option<CompiledPredicate>` branch per row.
41//   2. Drop two bounds checks per row by reading the null bitmap byte
42//      and the 8-byte value via raw pointer casts.
43//
44// SAFETY (shared across every call site below):
45//
46//   - `$bmp_byte` is `col_idx / 8` where `col_idx < n_cols`, and the row
47//     encoding stores `bitmap_size = n_cols.div_ceil(8)` bytes of bitmap
48//     starting at offset 2. So `2 + $bmp_byte < 2 + bitmap_size ≤ row_len`
49//     and `get_unchecked(2 + $bmp_byte)` is inside the row slice.
50//   - `$off = 2 + bitmap_size + fixed_offsets[col_idx]` for a fixed-size
51//     column. Every fixed-size column contributes `fixed_size(type_id)`
52//     bytes to the fixed region, so the row always has `[$off .. $off+8]`
53//     available for any i64/f64 column — enforced by the row encoder
54//     (`storage/src/row.rs`) and the schema invariant that a row with a
55//     given schema has `row_len ≥ 2 + bitmap_size + fixed_region_size`.
56//   - Both macros are only invoked from `agg_single_col_fast`, which
57//     early-returns if the column isn't Int/Float (8-byte fixed) and
58//     early-returns if `fast.fixed_offsets[col_idx]` is `None`.
59macro_rules! agg_int_loop {
60    (
61        $self:expr, $table:expr, $pred:expr,
62        $bmp_byte:expr, $bmp_bit:expr, $off:expr,
63        |$v:ident : i64| $body:block
64    ) => {{
65        let bmp_byte = $bmp_byte;
66        let bmp_bit = $bmp_bit;
67        let off = $off;
68        if let Some(pred) = &$pred {
69            $self
70                .catalog
71                .for_each_row_raw($table, |_rid, data| {
72                    if !pred(data) {
73                        return;
74                    }
75                    // SAFETY: see module-level comment on agg_int_loop!.
76                    let bmp = unsafe { *data.get_unchecked(2 + bmp_byte) };
77                    if (bmp >> bmp_bit) & 1 == 1 {
78                        return;
79                    }
80                    let $v: i64 =
81                        unsafe { i64::from_le_bytes(*(data.as_ptr().add(off) as *const [u8; 8])) };
82                    $body
83                })
84                .map_err(|e| e.to_string())?;
85        } else {
86            $self
87                .catalog
88                .for_each_row_raw($table, |_rid, data| {
89                    // SAFETY: see module-level comment on agg_int_loop!.
90                    let bmp = unsafe { *data.get_unchecked(2 + bmp_byte) };
91                    if (bmp >> bmp_bit) & 1 == 1 {
92                        return;
93                    }
94                    let $v: i64 =
95                        unsafe { i64::from_le_bytes(*(data.as_ptr().add(off) as *const [u8; 8])) };
96                    $body
97                })
98                .map_err(|e| e.to_string())?;
99        }
100    }};
101}
102
103macro_rules! agg_float_loop {
104    (
105        $self:expr, $table:expr, $pred:expr,
106        $bmp_byte:expr, $bmp_bit:expr, $off:expr,
107        |$v:ident : f64| $body:block
108    ) => {{
109        let bmp_byte = $bmp_byte;
110        let bmp_bit = $bmp_bit;
111        let off = $off;
112        if let Some(pred) = &$pred {
113            $self
114                .catalog
115                .for_each_row_raw($table, |_rid, data| {
116                    if !pred(data) {
117                        return;
118                    }
119                    // SAFETY: see module-level comment on agg_float_loop!.
120                    let bmp = unsafe { *data.get_unchecked(2 + bmp_byte) };
121                    if (bmp >> bmp_bit) & 1 == 1 {
122                        return;
123                    }
124                    let $v: f64 =
125                        unsafe { f64::from_le_bytes(*(data.as_ptr().add(off) as *const [u8; 8])) };
126                    $body
127                })
128                .map_err(|e| e.to_string())?;
129        } else {
130            $self
131                .catalog
132                .for_each_row_raw($table, |_rid, data| {
133                    // SAFETY: see module-level comment on agg_float_loop!.
134                    let bmp = unsafe { *data.get_unchecked(2 + bmp_byte) };
135                    if (bmp >> bmp_bit) & 1 == 1 {
136                        return;
137                    }
138                    let $v: f64 =
139                        unsafe { f64::from_le_bytes(*(data.as_ptr().add(off) as *const [u8; 8])) };
140                    $body
141                })
142                .map_err(|e| e.to_string())?;
143        }
144    }};
145}
146
147/// Mission infra-1: classify a parsed statement as read-only vs. mutating.
148/// Used by [`Engine::execute_powql_readonly`] and by the server handler
149/// to decide between the RwLock reader and writer sides. `Union` recurses
150/// because each side can independently be read/write (though in practice
151/// both sides are reads — the parser only builds Union from query shapes).
152pub fn is_read_only_statement(stmt: &Statement) -> bool {
153    match stmt {
154        Statement::Query(_) => true,
155        Statement::Union(u) => is_read_only_statement(&u.left) && is_read_only_statement(&u.right),
156        Statement::Insert(_)
157        | Statement::Upsert(_)
158        | Statement::UpdateQuery(_)
159        | Statement::DeleteQuery(_)
160        | Statement::CreateType(_)
161        | Statement::AlterTable(_)
162        | Statement::DropTable(_)
163        | Statement::CreateView(_)
164        | Statement::RefreshView(_)
165        | Statement::DropView(_) => false,
166        Statement::Explain(inner) => is_read_only_statement(inner),
167    }
168}
169
170pub struct Engine {
171    catalog: Catalog,
172    /// Mission D9 — cached parsed+planned query trees keyed by canonical
173    /// hash. Saves the ~3μs parse+plan cost on repeat queries that differ
174    /// only in literal values.
175    ///
176    /// Mission infra-1: wrapped in `Mutex` so the read path can be driven
177    /// by `&self`. The critical section is extremely short — a single
178    /// hashmap lookup + plan clone on a hit, or a single insert on a miss.
179    /// A full `RwLock` would be over-engineered here; the contention window
180    /// is smaller than the read-path scan work it gates.
181    plan_cache: Mutex<PlanCache>,
182    /// Mission C Phase 13: reusable `Vec<Value>` scratch buffer for the
183    /// prepared-insert fast path. `execute_prepared` used to allocate a
184    /// fresh `vec![Value::Empty; n_cols]` on every insert; recycling this
185    /// buffer shaves one heap alloc per row on `insert_batch_1k`.
186    insert_values_scratch: Vec<Value>,
187    /// Materialized view registry: tracks view definitions, dependencies,
188    /// and dirty state. Views are backed by regular catalog tables; this
189    /// registry adds the lifecycle metadata.
190    view_registry: ViewRegistry,
191}
192
193/// Mission C Phase 5: a pre-parsed, pre-planned query. The caller holds
194/// one of these and repeatedly executes it with fresh literal values via
195/// [`Engine::execute_prepared`]. This is PowDB's equivalent of SQLite's
196/// `prepare_cached` — the parse + plan cost is paid exactly once, and
197/// every subsequent execution skips the lexer, the canonicalise hash,
198/// and the plan-cache hashmap lookup.
199///
200/// The template plan still contains the literal values from the original
201/// query string. They're overwritten on every call. See `execute_prepared`
202/// for the substitution walk order.
203///
204/// For `PlanNode::Insert` templates whose assignment values are all plain
205/// literals (the common case — `insert T { id := 1, name := "a" }`), we
206/// additionally resolve the column indices at prepare time and stash them
207/// in `insert_col_indices`. That lets `execute_prepared` skip the
208/// plan-clone + substitute walk entirely and build the row directly from
209/// the caller's literal slice — the fastest possible insert through the
210/// query layer.
211#[derive(Clone)]
212pub struct PreparedQuery {
213    plan_template: PlanNode,
214    /// Total number of `Expr::Literal` slots reachable from the plan.
215    /// Callers must supply exactly this many literals per execution.
216    pub param_count: usize,
217    /// Fast-path metadata for `PlanNode::Insert`. `Some` when:
218    ///   * the template is an Insert, and
219    ///   * every assignment RHS is `Expr::Literal(_)` (no computed exprs),
220    ///     which means param_count == assignments.len() and the caller's
221    ///     literal slice maps 1:1 to schema column indices.
222    ///
223    /// Mission C Phase 15: upgraded from a bare `Vec<usize>` to a
224    /// dedicated [`InsertFast`] struct so the execute path can skip the
225    /// second `catalog.schema(table)` HashMap lookup just to read
226    /// `n_cols`, and can dispatch through `get_table_mut` + `tbl.insert`
227    /// instead of going via the generic `catalog.insert` wrapper.
228    insert_fast: Option<InsertFast>,
229    /// Mission C Phase 14: fast-path metadata for point updates by primary
230    /// key — `T filter .pk = <lit> update { col := <lit> }` where `pk` is
231    /// an indexed column and `col` is fixed-size and not indexed. At
232    /// execute time we skip plan clone, substitute walk, schema re-lookup,
233    /// `resolved_assignments` + `FastPatch` + `matching_rids` Vec allocs,
234    /// and the whole `PlanNode::Update` arm. Just a btree lookup and a
235    /// byte patch.
236    update_pk_fast: Option<UpdatePkFast>,
237}
238
239/// Mission C Phase 15: precomputed insert fast-path metadata. Built once
240/// in [`Engine::prepare`] from a `PlanNode::Insert` template whose every
241/// assignment RHS is a raw literal. The execute path reads `n_cols` and
242/// `col_indices` directly — no catalog schema lookup needed.
243#[derive(Clone)]
244struct InsertFast {
245    /// Mission C Phase 18: cached slot index into `Catalog::tables`.
246    /// Resolved once at `prepare` time and stable for the lifetime of
247    /// the catalog (PowDB has no DROP TABLE). Lets the hot path dispatch
248    /// through `catalog.table_by_slot_mut(slot)` — a pure Vec index,
249    /// no hash, no bucket walk, no string compare.
250    table_slot: usize,
251    /// Schema column index for each positional literal, in the order the
252    /// caller passes them.
253    col_indices: Vec<usize>,
254    /// Total number of schema columns — the size `insert_values_scratch`
255    /// must be resized to before filling positions via `col_indices`.
256    /// Cached here so the hot loop skips `catalog.schema(table)` entirely.
257    n_cols: usize,
258}
259
260/// Mission C Phase 14: precomputed fast-path for `update_by_pk` shaped
261/// prepared queries. Built once in [`Engine::prepare`] and reused on every
262/// `execute_prepared` call.
263#[derive(Clone)]
264struct UpdatePkFast {
265    /// Mission C Phase 18: cached slot index into `Catalog::tables`.
266    /// Resolved once at `prepare` time and stable for the lifetime of
267    /// the catalog. At a 52ns total budget the swap from FxHashMap
268    /// probe to a Vec index is measurable.
269    table_slot: usize,
270    /// Name of the key column (the `.id = ?` side). We look this up in
271    /// the owning table's `indexed_cols` at execute time rather than
272    /// caching a raw `&BTree` — the engine owns the catalog and can't
273    /// hand out long-lived borrows anyway, and the n≤5 linear scan is
274    /// a handful of ns.
275    key_col: String,
276    /// Byte offset of the target fixed column in the row encoding:
277    /// `2 + bitmap_size + layout.fixed_offsets[target_col]`.
278    field_off: usize,
279    /// Byte offset of the bitmap byte containing the target column's null
280    /// bit (`2 + target_col / 8`).
281    bitmap_byte_off: usize,
282    /// Bit mask for the target column's null bit.
283    bit_mask: u8,
284    /// Type of the target fixed column — drives the literal-to-bytes
285    /// encoding at execute time.
286    target_type: TypeId,
287    /// Index into the caller's `literals` slice that holds the filter key.
288    /// Always 0 today (filter literal is visited before the assignment
289    /// RHS), but stored explicitly so the contract is obvious.
290    key_literal_idx: usize,
291    /// Index into the caller's `literals` slice that holds the new value.
292    value_literal_idx: usize,
293}
294
295impl Engine {
296    pub fn new(data_dir: &Path) -> io::Result<Self> {
297        std::fs::create_dir_all(data_dir)?;
298        // Try to reopen an existing database first; only create a fresh
299        // catalog when there isn't one already on disk.
300        let catalog = match Catalog::open(data_dir) {
301            Ok(c) => {
302                info!(data_dir = %data_dir.display(), "engine reopened existing database");
303                c
304            }
305            Err(e) if e.kind() == io::ErrorKind::NotFound => {
306                info!(data_dir = %data_dir.display(), "engine initialized fresh database");
307                Catalog::create(data_dir)?
308            }
309            Err(e) => return Err(e),
310        };
311        let view_registry =
312            ViewRegistry::open(data_dir).unwrap_or_else(|_| ViewRegistry::new(data_dir));
313        Ok(Engine {
314            catalog,
315            plan_cache: Mutex::new(PlanCache::new(PLAN_CACHE_CAPACITY)),
316            insert_values_scratch: Vec::new(),
317            view_registry,
318        })
319    }
320
321    /// Parse + plan + execute a PowQL query.
322    ///
323    /// Mission D6 — tracing collapse: the previous implementation ran 4
324    /// `Instant::now()` + 3 `elapsed().as_micros()` calls + formatted an
325    /// `info!` span on every query, even when tracing was disabled. On a
326    /// sub-microsecond `point_lookup_indexed` call that overhead was
327    /// 100-200ns — 20%+ of the whole query. We now measure time only when
328    /// INFO is actually enabled via `tracing::enabled!`, and we moved the
329    /// noisy `debug!(?plan)` line behind the same gate so the Debug
330    /// formatter can't run unconditionally either.
331    ///
332    /// Mission D9 — plan cache: on the hot path we canonicalise the query
333    /// text (lex + FNV-1a hash with literal values stripped), check the
334    /// cache, and on a hit substitute the new literals into a clone of the
335    /// cached plan. This skips re-lexing, re-parsing, and re-planning —
336    /// around 3μs per call on bench workloads. On a miss we plan as before
337    /// and insert the plan under its canonical hash.
338    pub fn execute_powql(&mut self, input: &str) -> Result<QueryResult, String> {
339        // Hot path: tracing disabled. Zero syscalls, zero formatting.
340        if !tracing::enabled!(Level::INFO) {
341            // D9: try the plan cache first. Canonicalisation lexes the
342            // query once; on a hit we skip the parser and planner entirely.
343            if let Ok((hash, literals)) = canonicalize(input) {
344                let cached = self
345                    .plan_cache
346                    .lock()
347                    .map_err(|e| format!("plan cache lock poisoned: {e}"))?
348                    .get_with_substitution(hash, &literals);
349                if let Some(plan) = cached {
350                    let plan = lower_unindexed_range_scans(&self.catalog, &plan);
351                    let result = self.execute_plan(&plan);
352                    // Mission B (post-review): statement-boundary WAL
353                    // group commit. Catalog::wal_log now only appends;
354                    // the fsync happens here exactly once per statement.
355                    // `sync_wal` is a no-op when nothing was buffered
356                    // (pure reads pay zero fsync).
357                    self.catalog.sync_wal().map_err(|e| e.to_string())?;
358                    return result;
359                }
360                // Miss — plan, insert, execute.
361                return match planner::plan(input) {
362                    Ok(plan) => {
363                        self.plan_cache
364                            .lock()
365                            .map_err(|e| format!("plan cache lock poisoned: {e}"))?
366                            .insert(hash, plan.clone());
367                        let plan = lower_unindexed_range_scans(&self.catalog, &plan);
368                        let result = self.execute_plan(&plan);
369                        self.catalog.sync_wal().map_err(|e| e.to_string())?;
370                        result
371                    }
372                    Err(e) => Err(e.to_string()),
373                };
374            }
375            // Lex error — fall through to the planner so the caller gets a
376            // consistent error shape.
377            return match planner::plan(input) {
378                Ok(plan) => {
379                    let plan = lower_unindexed_range_scans(&self.catalog, &plan);
380                    let result = self.execute_plan(&plan);
381                    self.catalog.sync_wal().map_err(|e| e.to_string())?;
382                    result
383                }
384                Err(e) => Err(e.to_string()),
385            };
386        }
387
388        // Instrumented path — only taken under explicit tracing subscribers.
389        let total_start = Instant::now();
390        let plan_start = Instant::now();
391        let plan = planner::plan(input).map_err(|e| {
392            error!(query = %input, error = %e.to_string(), "query plan failed");
393            e.to_string()
394        })?;
395        let plan_us = plan_start.elapsed().as_micros();
396
397        let exec_start = Instant::now();
398        let plan = lower_unindexed_range_scans(&self.catalog, &plan);
399        let result = self.execute_plan(&plan);
400        // Mission B (post-review): statement-boundary WAL flush.
401        let _ = self.catalog.sync_wal();
402        let exec_us = exec_start.elapsed().as_micros();
403
404        let total_us = total_start.elapsed().as_micros();
405        match &result {
406            Ok(r) => {
407                info!(
408                    query = %input,
409                    plan_us = plan_us,
410                    exec_us = exec_us,
411                    total_us = total_us,
412                    rows = r.row_count(),
413                    "query ok"
414                );
415            }
416            Err(e) => {
417                error!(
418                    query = %input,
419                    plan_us = plan_us,
420                    exec_us = exec_us,
421                    error = %e,
422                    "query failed"
423                );
424            }
425        }
426        result
427    }
428
429    /// Plan cache stats — useful for benches and debugging.
430    pub fn plan_cache_stats(&self) -> (u64, u64, usize) {
431        let cache = self.plan_cache.lock().unwrap();
432        (cache.hits, cache.misses, cache.len())
433    }
434
435    /// Mission infra-1: read-only entry point.
436    ///
437    /// Parses + plans + executes a PowQL query using only a shared borrow
438    /// on the engine. Rejects any statement that would mutate state
439    /// (Insert/Update/Delete/CreateTable/AlterTable/DropTable/CreateView/
440    /// RefreshView/DropView) by returning [`READONLY_NEEDS_WRITE`] so the
441    /// caller can escalate to the write lock.
442    ///
443    /// Also returns [`READONLY_NEEDS_WRITE`] if a materialized view in the
444    /// query is dirty — refreshing one requires `&mut self`, so the caller
445    /// must retake the write lock for the first refresh.
446    ///
447    /// This method is the concurrent-read fast path behind
448    /// `Arc<RwLock<Engine>>`: multiple threads can call it simultaneously
449    /// under a shared `.read()` lock and each will scan independently.
450    pub fn execute_powql_readonly(&self, input: &str) -> Result<QueryResult, String> {
451        // Parse the statement first so we can classify read vs. write
452        // without touching the catalog. This is the same lex+parse cost
453        // the hot path would pay anyway.
454        let stmt = crate::parser::parse(input).map_err(|e| e.to_string())?;
455        if !is_read_only_statement(&stmt) {
456            return Err(READONLY_NEEDS_WRITE.to_string());
457        }
458
459        // Try the plan cache first — identical hash scheme to
460        // `execute_powql` so both paths share cache state. The mutex
461        // section is just a hashmap lookup + plan clone.
462        if let Ok((hash, literals)) = canonicalize(input) {
463            let cached = self
464                .plan_cache
465                .lock()
466                .map_err(|e| format!("plan cache lock poisoned: {e}"))?
467                .get_with_substitution(hash, &literals);
468            if let Some(plan) = cached {
469                let plan = lower_unindexed_range_scans(&self.catalog, &plan);
470                return self.execute_plan_readonly(&plan);
471            }
472            // Miss: plan + insert + execute. The planner is pure, so this
473            // is safe from `&self`.
474            let plan = crate::planner::plan_statement(stmt).map_err(|e| e.to_string())?;
475            self.plan_cache
476                .lock()
477                .map_err(|e| format!("plan cache lock poisoned: {e}"))?
478                .insert(hash, plan.clone());
479            let plan = lower_unindexed_range_scans(&self.catalog, &plan);
480            return self.execute_plan_readonly(&plan);
481        }
482        // Lex error — fall through to the planner for a consistent error
483        // shape (though `parse` above would usually have caught it).
484        let plan = crate::planner::plan_statement(stmt).map_err(|e| e.to_string())?;
485        let plan = lower_unindexed_range_scans(&self.catalog, &plan);
486        self.execute_plan_readonly(&plan)
487    }
488
489    /// Read-only version of [`Engine::execute_plan`]. Dispatches the
490    /// read-path plan variants by calling `&self` helpers and errors with
491    /// [`READONLY_NEEDS_WRITE`] on any write variant. This is the
492    /// recursion target for composite read plans under the RwLock reader.
493    ///
494    /// The dispatch mirrors `execute_plan` for the read branches but does
495    /// not carry any of the fast-paths that need `&mut self` (e.g. plan-
496    /// cache mutation on inner subqueries is handled via the shared mutex
497    /// in [`Engine::execute_powql_readonly`]; in-flight subquery
498    /// materialisation uses [`Engine::materialize_subqueries_readonly`]).
499    fn execute_plan_readonly(&self, plan: &PlanNode) -> Result<QueryResult, String> {
500        match plan {
501            PlanNode::SeqScan { table } => {
502                // Dirty view means we'd need to refresh it — can't do that
503                // under `&self`. Escalate to the write path.
504                if self.view_registry.is_dirty(table) {
505                    return Err(READONLY_NEEDS_WRITE.to_string());
506                }
507                let schema = self
508                    .catalog
509                    .schema(table)
510                    .ok_or_else(|| format!("table '{table}' not found"))?
511                    .clone();
512                let columns: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
513                let rows: Vec<Vec<Value>> = self
514                    .catalog
515                    .scan(table)
516                    .map_err(|e| e.to_string())?
517                    .map(|(_, row)| row)
518                    .collect();
519                Ok(QueryResult::Rows { columns, rows })
520            }
521
522            PlanNode::AliasScan { table, alias } => {
523                let schema = self
524                    .catalog
525                    .schema(table)
526                    .ok_or_else(|| format!("table '{table}' not found"))?
527                    .clone();
528                let columns: Vec<String> = schema
529                    .columns
530                    .iter()
531                    .map(|c| format!("{alias}.{}", c.name))
532                    .collect();
533                let rows: Vec<Vec<Value>> = self
534                    .catalog
535                    .scan(table)
536                    .map_err(|e| e.to_string())?
537                    .map(|(_, row)| row)
538                    .collect();
539                Ok(QueryResult::Rows { columns, rows })
540            }
541
542            PlanNode::IndexScan { table, column, key } => {
543                let schema = self
544                    .catalog
545                    .schema(table)
546                    .ok_or_else(|| format!("table '{table}' not found"))?
547                    .clone();
548                let columns: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
549                let key_value = literal_to_value(key)?;
550                let tbl = self
551                    .catalog
552                    .get_table(table)
553                    .ok_or_else(|| format!("table '{table}' not found"))?;
554
555                if let Some(btree) = tbl.index(column) {
556                    let hit = match &key_value {
557                        Value::Int(k) => btree.lookup_int(*k),
558                        other => btree.lookup(other),
559                    };
560                    let rows = match hit {
561                        Some(rid) => match tbl.heap.get(rid) {
562                            Some(data) => vec![decode_row(&tbl.schema, &data)],
563                            None => Vec::new(),
564                        },
565                        None => Vec::new(),
566                    };
567                    return Ok(QueryResult::Rows { columns, rows });
568                }
569
570                // No index: synthetic eq predicate + compiled scan.
571                let fast = FastLayout::new(&schema);
572                let synth_pred = Expr::BinaryOp(
573                    Box::new(Expr::Field(column.clone())),
574                    BinOp::Eq,
575                    Box::new(key.clone()),
576                );
577                if let Some(compiled) = compile_predicate(&synth_pred, &columns, &fast, &schema) {
578                    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(64);
579                    self.catalog
580                        .for_each_row_raw(table, |_rid, data| {
581                            if compiled(data) {
582                                rows.push(decode_row(&schema, data));
583                            }
584                        })
585                        .map_err(|e| e.to_string())?;
586                    return Ok(QueryResult::Rows { columns, rows });
587                }
588
589                // Last resort: slow eq-check.
590                let col_idx = schema
591                    .column_index(column)
592                    .ok_or_else(|| format!("column '{column}' not found"))?;
593                let rows: Vec<Vec<Value>> = tbl
594                    .scan()
595                    .filter_map(|(_, row)| {
596                        if row[col_idx] == key_value {
597                            Some(row)
598                        } else {
599                            None
600                        }
601                    })
602                    .collect();
603                Ok(QueryResult::Rows { columns, rows })
604            }
605
606            PlanNode::RangeScan {
607                table,
608                column,
609                start,
610                end,
611            } => {
612                let tbl = self
613                    .catalog
614                    .get_table(table)
615                    .ok_or_else(|| format!("table '{table}' not found"))?;
616                let columns: Vec<String> =
617                    tbl.schema.columns.iter().map(|c| c.name.clone()).collect();
618                let schema = tbl.schema.clone();
619
620                let start_val = match start {
621                    Some((expr, _)) => Some(literal_to_value(expr)?),
622                    None => None,
623                };
624                let end_val = match end {
625                    Some((expr, _)) => Some(literal_to_value(expr)?),
626                    None => None,
627                };
628                let start_inclusive = start.as_ref().map(|(_, inc)| *inc).unwrap_or(true);
629                let end_inclusive = end.as_ref().map(|(_, inc)| *inc).unwrap_or(true);
630
631                if let Some(btree) = tbl.index(column) {
632                    let hits: Vec<(Value, RowId)> = match (&start_val, &end_val) {
633                        (Some(s), Some(e)) => btree.range(s, e).collect(),
634                        (Some(s), None) => btree.range_from(s),
635                        (None, Some(e)) => btree.range_to(e),
636                        (None, None) => {
637                            // Unbounded both sides — equivalent to seq scan.
638                            let rows: Vec<Vec<Value>> = tbl.scan().map(|(_, row)| row).collect();
639                            return Ok(QueryResult::Rows { columns, rows });
640                        }
641                    };
642                    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(hits.len());
643                    for (key, rid) in hits {
644                        // Filter for exclusive bounds.
645                        if !start_inclusive {
646                            if let Some(ref s) = start_val {
647                                if &key == s {
648                                    continue;
649                                }
650                            }
651                        }
652                        if !end_inclusive {
653                            if let Some(ref e) = end_val {
654                                if &key == e {
655                                    continue;
656                                }
657                            }
658                        }
659                        if let Some(data) = tbl.heap.get(rid) {
660                            rows.push(decode_row(&schema, &data));
661                        }
662                    }
663                    return Ok(QueryResult::Rows { columns, rows });
664                }
665
666                // Fallback: no index — synthesize the range predicate and scan.
667                let fast = FastLayout::new(&schema);
668                let synth = synthesize_range_predicate(column, start, end);
669                if let Some(compiled) = compile_predicate(&synth, &columns, &fast, &schema) {
670                    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(64);
671                    self.catalog
672                        .for_each_row_raw(table, |_rid, data| {
673                            if compiled(data) {
674                                rows.push(decode_row(&schema, data));
675                            }
676                        })
677                        .map_err(|e| e.to_string())?;
678                    return Ok(QueryResult::Rows { columns, rows });
679                }
680
681                // Last resort: decoded row eval.
682                let col_idx = schema
683                    .column_index(column)
684                    .ok_or_else(|| format!("column '{column}' not found"))?;
685                let rows: Vec<Vec<Value>> = tbl
686                    .scan()
687                    .filter(|(_, row)| {
688                        range_matches(
689                            &row[col_idx],
690                            &start_val,
691                            start_inclusive,
692                            &end_val,
693                            end_inclusive,
694                        )
695                    })
696                    .map(|(_, row)| row)
697                    .collect();
698                Ok(QueryResult::Rows { columns, rows })
699            }
700
701            PlanNode::Filter { input, predicate } => {
702                // Materialise subqueries using the `&self` variant.
703                // Uncorrelated subqueries are replaced with InList/Bool;
704                // correlated ones are left as InSubquery/ExistsSubquery
705                // for per-row materialisation below.
706                let materialized;
707                let predicate = if contains_subquery(predicate) {
708                    materialized = self.materialize_subqueries_readonly(predicate)?;
709                    &materialized
710                } else {
711                    predicate
712                };
713
714                // Correlated subquery path: per-row materialisation.
715                if contains_subquery(predicate) {
716                    let result = self.execute_plan_readonly(input)?;
717                    return match result {
718                        QueryResult::Rows { columns, rows } => {
719                            let mut filtered = Vec::new();
720                            for row in rows {
721                                let row_pred = self.materialize_correlated_for_row_readonly(
722                                    predicate, &row, &columns,
723                                )?;
724                                if eval_predicate(&row_pred, &row, &columns) {
725                                    filtered.push(row);
726                                }
727                            }
728                            Ok(QueryResult::Rows {
729                                columns,
730                                rows: filtered,
731                            })
732                        }
733                        _ => Err("filter requires row input".into()),
734                    };
735                }
736
737                // Fused Filter+SeqScan fast path.
738                if let PlanNode::SeqScan { table } = input.as_ref() {
739                    if self.view_registry.is_dirty(table) {
740                        return Err(READONLY_NEEDS_WRITE.to_string());
741                    }
742                    let schema = self
743                        .catalog
744                        .schema(table)
745                        .ok_or_else(|| format!("table '{table}' not found"))?
746                        .clone();
747                    let columns: Vec<String> =
748                        schema.columns.iter().map(|c| c.name.clone()).collect();
749                    let fast = FastLayout::new(&schema);
750                    let row_layout = RowLayout::new(&schema);
751                    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(64);
752
753                    if let Some(compiled) = compile_predicate(predicate, &columns, &fast, &schema) {
754                        self.catalog
755                            .for_each_row_raw(table, |_rid, data| {
756                                if compiled(data) {
757                                    rows.push(decode_row(&schema, data));
758                                }
759                            })
760                            .map_err(|e| e.to_string())?;
761                    } else {
762                        let pred_cols = predicate_column_indices(predicate, &columns);
763                        self.catalog
764                            .for_each_row_raw(table, |_rid, data| {
765                                let pred_row =
766                                    decode_selective(&schema, &row_layout, data, &pred_cols);
767                                if eval_predicate(predicate, &pred_row, &columns) {
768                                    rows.push(decode_row(&schema, data));
769                                }
770                            })
771                            .map_err(|e| e.to_string())?;
772                    }
773
774                    return Ok(QueryResult::Rows { columns, rows });
775                }
776
777                // General path.
778                let result = self.execute_plan_readonly(input)?;
779                match result {
780                    QueryResult::Rows { columns, rows } => {
781                        let filtered: Vec<Vec<Value>> = rows
782                            .into_iter()
783                            .filter(|row| eval_predicate(predicate, row, &columns))
784                            .collect();
785                        Ok(QueryResult::Rows {
786                            columns,
787                            rows: filtered,
788                        })
789                    }
790                    _ => Err("filter requires row input".into()),
791                }
792            }
793
794            PlanNode::Project { input, fields } => {
795                // Fast path: Project over IndexScan. Avoids full-row decode
796                // by calling decode_column only for projected fields.
797                if let PlanNode::IndexScan { table, column, key } = input.as_ref() {
798                    let key_value = literal_to_value(key)?;
799                    let tbl = self
800                        .catalog
801                        .get_table(table)
802                        .ok_or_else(|| format!("table '{table}' not found"))?;
803                    let schema = &tbl.schema;
804                    let layout = tbl.row_layout();
805
806                    let proj_columns: Vec<String> = fields
807                        .iter()
808                        .map(|f| {
809                            f.alias.clone().unwrap_or_else(|| match &f.expr {
810                                Expr::Field(name) => name.clone(),
811                                _ => "?".into(),
812                            })
813                        })
814                        .collect();
815
816                    let proj_indices: Vec<usize> = fields
817                        .iter()
818                        .filter_map(|f| {
819                            if let Expr::Field(name) = &f.expr {
820                                schema.column_index(name)
821                            } else {
822                                None
823                            }
824                        })
825                        .collect();
826
827                    if let Some(btree) = tbl.index(column) {
828                        let lookup_result = match &key_value {
829                            Value::Int(k) => btree.lookup_int(*k),
830                            other => btree.lookup(other),
831                        };
832                        let rows = match lookup_result {
833                            Some(rid) => match tbl.heap.get(rid) {
834                                Some(data) => {
835                                    let row: Vec<Value> = proj_indices
836                                        .iter()
837                                        .map(|&ci| decode_column(schema, layout, &data, ci))
838                                        .collect();
839                                    vec![row]
840                                }
841                                None => Vec::new(),
842                            },
843                            None => Vec::new(),
844                        };
845                        return Ok(QueryResult::Rows {
846                            columns: proj_columns,
847                            rows,
848                        });
849                    }
850                }
851
852                // Fast paths over Limit(Sort(...)) / Limit(Filter(...)) / Limit(SeqScan).
853                if let PlanNode::Limit {
854                    input: inner,
855                    count: limit_expr,
856                } = input.as_ref()
857                {
858                    if let PlanNode::Sort {
859                        input: sort_input,
860                        keys,
861                    } = inner.as_ref()
862                    {
863                        if keys.len() == 1 {
864                            let sort_field = &keys[0].field;
865                            let descending = keys[0].descending;
866                            let limit = match limit_expr {
867                                Expr::Literal(Literal::Int(v)) if *v >= 0 => *v as usize,
868                                _ => usize::MAX,
869                            };
870                            let (table_opt, pred_opt): (Option<&str>, Option<&Expr>) =
871                                match sort_input.as_ref() {
872                                    PlanNode::SeqScan { table } => (Some(table.as_str()), None),
873                                    PlanNode::Filter {
874                                        input: fi,
875                                        predicate,
876                                    } => {
877                                        if let PlanNode::SeqScan { table } = fi.as_ref() {
878                                            (Some(table.as_str()), Some(predicate))
879                                        } else {
880                                            (None, None)
881                                        }
882                                    }
883                                    _ => (None, None),
884                                };
885                            if let Some(table) = table_opt {
886                                if let Some(result) = self.project_filter_sort_limit_fast(
887                                    table, fields, sort_field, descending, limit, pred_opt,
888                                )? {
889                                    return Ok(result);
890                                }
891                            }
892                        }
893                    }
894                    if let PlanNode::Filter {
895                        input: fi,
896                        predicate,
897                    } = inner.as_ref()
898                    {
899                        if let PlanNode::SeqScan { table } = fi.as_ref() {
900                            let limit = match limit_expr {
901                                Expr::Literal(Literal::Int(v)) if *v >= 0 => *v as usize,
902                                _ => usize::MAX,
903                            };
904                            if let Some(result) = self.project_filter_limit_fast(
905                                table,
906                                fields,
907                                limit,
908                                Some(predicate),
909                            )? {
910                                return Ok(result);
911                            }
912                        }
913                    }
914                    if let PlanNode::SeqScan { table } = inner.as_ref() {
915                        let limit = match limit_expr {
916                            Expr::Literal(Literal::Int(v)) if *v >= 0 => *v as usize,
917                            _ => usize::MAX,
918                        };
919                        if let Some(result) =
920                            self.project_filter_limit_fast(table, fields, limit, None)?
921                        {
922                            return Ok(result);
923                        }
924                    }
925                }
926
927                // Project(Filter(SeqScan)) without Limit.
928                if let PlanNode::Filter {
929                    input: fi,
930                    predicate,
931                } = input.as_ref()
932                {
933                    if let PlanNode::SeqScan { table } = fi.as_ref() {
934                        if let Some(result) = self.project_filter_limit_fast(
935                            table,
936                            fields,
937                            usize::MAX,
938                            Some(predicate),
939                        )? {
940                            return Ok(result);
941                        }
942                    }
943                }
944
945                // Project(SeqScan) without Filter or Limit.
946                if let PlanNode::SeqScan { table } = input.as_ref() {
947                    if let Some(result) =
948                        self.project_filter_limit_fast(table, fields, usize::MAX, None)?
949                    {
950                        return Ok(result);
951                    }
952                }
953
954                // Generic path.
955                let result = self.execute_plan_readonly(input)?;
956                match result {
957                    QueryResult::Rows { columns, rows } => {
958                        let proj_columns: Vec<String> = fields
959                            .iter()
960                            .map(|f| {
961                                f.alias.clone().unwrap_or_else(|| match &f.expr {
962                                    Expr::Field(name) => name.clone(),
963                                    Expr::QualifiedField { qualifier, field } => {
964                                        format!("{qualifier}.{field}")
965                                    }
966                                    _ => "?".into(),
967                                })
968                            })
969                            .collect();
970                        let proj_rows: Vec<Vec<Value>> = rows
971                            .iter()
972                            .map(|row| {
973                                fields
974                                    .iter()
975                                    .map(|f| eval_expr(&f.expr, row, &columns))
976                                    .collect()
977                            })
978                            .collect();
979                        Ok(QueryResult::Rows {
980                            columns: proj_columns,
981                            rows: proj_rows,
982                        })
983                    }
984                    _ => Err("project requires row input".into()),
985                }
986            }
987
988            PlanNode::Sort { input, keys } => {
989                let result = self.execute_plan_readonly(input)?;
990                match result {
991                    QueryResult::Rows { columns, mut rows } => {
992                        let key_indices: Vec<(usize, bool)> = keys
993                            .iter()
994                            .map(|k| {
995                                columns
996                                    .iter()
997                                    .position(|c| c == &k.field)
998                                    .map(|idx| (idx, k.descending))
999                                    .ok_or_else(|| format!("column '{}' not found", k.field))
1000                            })
1001                            .collect::<Result<_, String>>()?;
1002                        rows.sort_by(|a, b| {
1003                            for &(col_idx, descending) in &key_indices {
1004                                let cmp = a[col_idx].cmp(&b[col_idx]);
1005                                let cmp = if descending { cmp.reverse() } else { cmp };
1006                                if cmp != std::cmp::Ordering::Equal {
1007                                    return cmp;
1008                                }
1009                            }
1010                            std::cmp::Ordering::Equal
1011                        });
1012                        Ok(QueryResult::Rows { columns, rows })
1013                    }
1014                    _ => Err("sort requires row input".into()),
1015                }
1016            }
1017
1018            PlanNode::Limit { input, count } => {
1019                let result = self.execute_plan_readonly(input)?;
1020                let n = match count {
1021                    Expr::Literal(Literal::Int(v)) => *v as usize,
1022                    _ => return Err("limit must be integer literal".into()),
1023                };
1024                match result {
1025                    QueryResult::Rows { columns, rows } => Ok(QueryResult::Rows {
1026                        columns,
1027                        rows: rows.into_iter().take(n).collect(),
1028                    }),
1029                    _ => Err("limit requires row input".into()),
1030                }
1031            }
1032
1033            PlanNode::Offset { input, count } => {
1034                let result = self.execute_plan_readonly(input)?;
1035                let n = match count {
1036                    Expr::Literal(Literal::Int(v)) => *v as usize,
1037                    _ => return Err("offset must be integer literal".into()),
1038                };
1039                match result {
1040                    QueryResult::Rows { columns, rows } => Ok(QueryResult::Rows {
1041                        columns,
1042                        rows: rows.into_iter().skip(n).collect(),
1043                    }),
1044                    _ => Err("offset requires row input".into()),
1045                }
1046            }
1047
1048            PlanNode::Aggregate {
1049                input,
1050                function,
1051                field,
1052            } => {
1053                // Fast path: count() over SeqScan.
1054                if *function == AggFunc::Count {
1055                    if let PlanNode::SeqScan { table } = input.as_ref() {
1056                        let mut count: i64 = 0;
1057                        self.catalog
1058                            .for_each_row_raw(table, |_rid, _data| {
1059                                count += 1;
1060                            })
1061                            .map_err(|e| e.to_string())?;
1062                        return Ok(QueryResult::Scalar(Value::Int(count)));
1063                    }
1064                    if let PlanNode::Filter {
1065                        input: inner,
1066                        predicate,
1067                    } = input.as_ref()
1068                    {
1069                        if let PlanNode::SeqScan { table } = inner.as_ref() {
1070                            let schema = self
1071                                .catalog
1072                                .schema(table)
1073                                .ok_or_else(|| format!("table '{table}' not found"))?
1074                                .clone();
1075                            let columns: Vec<String> =
1076                                schema.columns.iter().map(|c| c.name.clone()).collect();
1077                            let fast = FastLayout::new(&schema);
1078                            let row_layout = RowLayout::new(&schema);
1079
1080                            if let Some(compiled) =
1081                                compile_predicate(predicate, &columns, &fast, &schema)
1082                            {
1083                                let mut count: i64 = 0;
1084                                self.catalog
1085                                    .for_each_row_raw(table, |_rid, data| {
1086                                        if compiled(data) {
1087                                            count += 1;
1088                                        }
1089                                    })
1090                                    .map_err(|e| e.to_string())?;
1091                                return Ok(QueryResult::Scalar(Value::Int(count)));
1092                            }
1093
1094                            let pred_cols = predicate_column_indices(predicate, &columns);
1095                            let mut count: i64 = 0;
1096                            self.catalog
1097                                .for_each_row_raw(table, |_rid, data| {
1098                                    let pred_row =
1099                                        decode_selective(&schema, &row_layout, data, &pred_cols);
1100                                    if eval_predicate(predicate, &pred_row, &columns) {
1101                                        count += 1;
1102                                    }
1103                                })
1104                                .map_err(|e| e.to_string())?;
1105                            return Ok(QueryResult::Scalar(Value::Int(count)));
1106                        }
1107                    }
1108                }
1109
1110                // Fast path: sum/avg/min/max over single fixed-size numeric.
1111                if matches!(
1112                    function,
1113                    AggFunc::Sum
1114                        | AggFunc::Avg
1115                        | AggFunc::Min
1116                        | AggFunc::Max
1117                        | AggFunc::CountDistinct
1118                ) {
1119                    if let Some(col) = field.as_ref() {
1120                        let (table_opt, pred_opt): (Option<&str>, Option<&Expr>) =
1121                            match input.as_ref() {
1122                                PlanNode::SeqScan { table } => (Some(table.as_str()), None),
1123                                PlanNode::Filter {
1124                                    input: inner,
1125                                    predicate,
1126                                } => {
1127                                    if let PlanNode::SeqScan { table } = inner.as_ref() {
1128                                        (Some(table.as_str()), Some(predicate))
1129                                    } else {
1130                                        (None, None)
1131                                    }
1132                                }
1133                                _ => (None, None),
1134                            };
1135                        if let Some(table) = table_opt {
1136                            if let Some(result) =
1137                                self.agg_single_col_fast(table, col, *function, pred_opt)?
1138                            {
1139                                return Ok(result);
1140                            }
1141                        }
1142                    }
1143                }
1144
1145                // Generic path.
1146                let result = self.execute_plan_readonly(input)?;
1147                match result {
1148                    QueryResult::Rows { columns, rows } => match function {
1149                        AggFunc::Count => Ok(QueryResult::Scalar(Value::Int(rows.len() as i64))),
1150                        AggFunc::CountDistinct => {
1151                            let col = field.as_ref().ok_or("count distinct requires field")?;
1152                            let idx = columns
1153                                .iter()
1154                                .position(|c| c == col)
1155                                .ok_or("col not found")?;
1156                            let mut seen = std::collections::HashSet::new();
1157                            for row in &rows {
1158                                let v = &row[idx];
1159                                if !v.is_empty() {
1160                                    seen.insert(v.clone());
1161                                }
1162                            }
1163                            Ok(QueryResult::Scalar(Value::Int(seen.len() as i64)))
1164                        }
1165                        AggFunc::Avg => {
1166                            let col = field.as_ref().ok_or("avg requires field")?;
1167                            let idx = columns
1168                                .iter()
1169                                .position(|c| c == col)
1170                                .ok_or("col not found")?;
1171                            let sum: f64 = rows
1172                                .iter()
1173                                .filter_map(|r| match &r[idx] {
1174                                    Value::Int(v) => Some(*v as f64),
1175                                    Value::Float(v) => Some(*v),
1176                                    _ => None,
1177                                })
1178                                .sum();
1179                            let count = rows.len() as f64;
1180                            Ok(QueryResult::Scalar(Value::Float(sum / count)))
1181                        }
1182                        AggFunc::Sum => {
1183                            let col = field.as_ref().ok_or("sum requires field")?;
1184                            let idx = columns
1185                                .iter()
1186                                .position(|c| c == col)
1187                                .ok_or("col not found")?;
1188                            let mut int_sum: i64 = 0;
1189                            let mut float_sum: f64 = 0.0;
1190                            let mut saw_float = false;
1191                            for r in &rows {
1192                                match &r[idx] {
1193                                    Value::Int(v) => int_sum += *v,
1194                                    Value::Float(v) => {
1195                                        float_sum += *v;
1196                                        saw_float = true;
1197                                    }
1198                                    _ => {}
1199                                }
1200                            }
1201                            let result = if saw_float {
1202                                Value::Float(float_sum + int_sum as f64)
1203                            } else {
1204                                Value::Int(int_sum)
1205                            };
1206                            Ok(QueryResult::Scalar(result))
1207                        }
1208                        AggFunc::Min | AggFunc::Max => {
1209                            let col = field.as_ref().ok_or("min/max requires field")?;
1210                            let idx = columns
1211                                .iter()
1212                                .position(|c| c == col)
1213                                .ok_or("col not found")?;
1214                            let vals: Vec<&Value> = rows.iter().map(|r| &r[idx]).collect();
1215                            let result = if *function == AggFunc::Min {
1216                                vals.into_iter().min().cloned()
1217                            } else {
1218                                vals.into_iter().max().cloned()
1219                            };
1220                            Ok(QueryResult::Scalar(result.unwrap_or(Value::Empty)))
1221                        }
1222                    },
1223                    _ => Err("aggregate requires row input".into()),
1224                }
1225            }
1226
1227            PlanNode::Distinct { input } => {
1228                let result = self.execute_plan_readonly(input)?;
1229                match result {
1230                    QueryResult::Rows { columns, rows } => {
1231                        let mut seen = std::collections::HashSet::new();
1232                        let mut unique_rows = Vec::new();
1233                        for row in rows {
1234                            if seen.insert(row.clone()) {
1235                                unique_rows.push(row);
1236                            }
1237                        }
1238                        Ok(QueryResult::Rows {
1239                            columns,
1240                            rows: unique_rows,
1241                        })
1242                    }
1243                    other => Ok(other),
1244                }
1245            }
1246
1247            PlanNode::GroupBy {
1248                input,
1249                keys,
1250                aggregates,
1251                having,
1252            } => {
1253                let result = self.execute_plan_readonly(input)?;
1254                match result {
1255                    QueryResult::Rows { columns, rows } => {
1256                        let key_indices: Vec<usize> = keys
1257                            .iter()
1258                            .map(|k| {
1259                                columns
1260                                    .iter()
1261                                    .position(|c| c == k)
1262                                    .ok_or_else(|| format!("group-by column '{k}' not found"))
1263                            })
1264                            .collect::<Result<Vec<_>, _>>()?;
1265
1266                        let agg_field_indices: Vec<usize> = aggregates
1267                            .iter()
1268                            .map(|a| {
1269                                if a.field == "*" {
1270                                    Ok(usize::MAX)
1271                                } else {
1272                                    columns.iter().position(|c| c == &a.field).ok_or_else(|| {
1273                                        format!("aggregate column '{}' not found", a.field)
1274                                    })
1275                                }
1276                            })
1277                            .collect::<Result<Vec<_>, _>>()?;
1278
1279                        let mut group_map: rustc_hash::FxHashMap<Vec<Value>, usize> =
1280                            rustc_hash::FxHashMap::default();
1281                        let mut groups: Vec<(Vec<Value>, Vec<usize>)> = Vec::new();
1282                        for (ri, row) in rows.iter().enumerate() {
1283                            let key: Vec<Value> =
1284                                key_indices.iter().map(|&i| row[i].clone()).collect();
1285                            match group_map.get(&key) {
1286                                Some(&idx) => groups[idx].1.push(ri),
1287                                None => {
1288                                    let idx = groups.len();
1289                                    group_map.insert(key.clone(), idx);
1290                                    groups.push((key, vec![ri]));
1291                                }
1292                            }
1293                        }
1294
1295                        let mut out_columns: Vec<String> = keys.clone();
1296                        for agg in aggregates.iter() {
1297                            out_columns.push(agg.output_name.clone());
1298                        }
1299
1300                        let mut out_rows: Vec<Vec<Value>> = Vec::with_capacity(groups.len());
1301                        for (key_vals, row_indices) in &groups {
1302                            let mut row = key_vals.clone();
1303                            for (ai, agg) in aggregates.iter().enumerate() {
1304                                let col_idx = agg_field_indices[ai];
1305                                let val = compute_group_aggregate(
1306                                    agg.function,
1307                                    &rows,
1308                                    row_indices,
1309                                    col_idx,
1310                                );
1311                                row.push(val);
1312                            }
1313                            out_rows.push(row);
1314                        }
1315
1316                        if let Some(having_expr) = having {
1317                            out_rows.retain(|row| eval_predicate(having_expr, row, &out_columns));
1318                        }
1319
1320                        Ok(QueryResult::Rows {
1321                            columns: out_columns,
1322                            rows: out_rows,
1323                        })
1324                    }
1325                    _ => Err("group by requires row input".into()),
1326                }
1327            }
1328
1329            PlanNode::NestedLoopJoin {
1330                left,
1331                right,
1332                on,
1333                kind,
1334            } => {
1335                let left_result = self.execute_plan_readonly(left)?;
1336                let right_result = self.execute_plan_readonly(right)?;
1337                let (left_columns, left_rows) = match left_result {
1338                    QueryResult::Rows { columns, rows } => (columns, rows),
1339                    _ => return Err("join left side must produce rows".into()),
1340                };
1341                let (right_columns, right_rows) = match right_result {
1342                    QueryResult::Rows { columns, rows } => (columns, rows),
1343                    _ => return Err("join right side must produce rows".into()),
1344                };
1345
1346                if !matches!(kind, JoinKind::Cross) {
1347                    if let Some(pred) = on {
1348                        if let Some((l_idx, r_idx)) =
1349                            try_extract_equi_join_keys(pred, &left_columns, &right_columns)
1350                        {
1351                            return Ok(hash_join(
1352                                left_columns,
1353                                left_rows,
1354                                right_columns,
1355                                right_rows,
1356                                l_idx,
1357                                r_idx,
1358                                *kind,
1359                            ));
1360                        }
1361                    }
1362                }
1363
1364                let n_left = left_columns.len();
1365                let n_right = right_columns.len();
1366                let mut columns = Vec::with_capacity(n_left + n_right);
1367                columns.extend(left_columns);
1368                columns.extend(right_columns);
1369
1370                let mut rows: Vec<Vec<Value>> = Vec::with_capacity(left_rows.len());
1371                let mut combined: Vec<Value> = Vec::with_capacity(n_left + n_right);
1372
1373                for left_row in &left_rows {
1374                    let mut matched = false;
1375                    for right_row in &right_rows {
1376                        combined.clear();
1377                        combined.extend_from_slice(left_row);
1378                        combined.extend_from_slice(right_row);
1379                        let keep = match kind {
1380                            JoinKind::Cross => true,
1381                            JoinKind::Inner | JoinKind::LeftOuter => match on {
1382                                Some(pred) => eval_predicate(pred, &combined, &columns),
1383                                None => true,
1384                            },
1385                            JoinKind::RightOuter => {
1386                                unreachable!("planner rewrites RightOuter to LeftOuter")
1387                            }
1388                        };
1389                        if keep {
1390                            rows.push(combined.clone());
1391                            matched = true;
1392                        }
1393                    }
1394                    if !matched && matches!(kind, JoinKind::LeftOuter) {
1395                        let mut row = Vec::with_capacity(n_left + n_right);
1396                        row.extend_from_slice(left_row);
1397                        row.resize(n_left + n_right, Value::Empty);
1398                        rows.push(row);
1399                    }
1400                }
1401
1402                Ok(QueryResult::Rows { columns, rows })
1403            }
1404
1405            PlanNode::Window { input, windows } => {
1406                let result = self.execute_plan_readonly(input)?;
1407                execute_window(result, windows)
1408            }
1409
1410            PlanNode::Union { left, right, all } => {
1411                let left_result = self.execute_plan_readonly(left)?;
1412                let right_result = self.execute_plan_readonly(right)?;
1413                let (left_cols, left_rows) = match left_result {
1414                    QueryResult::Rows { columns, rows } => (columns, rows),
1415                    _ => return Err("UNION requires query results on left side".into()),
1416                };
1417                let (_, right_rows) = match right_result {
1418                    QueryResult::Rows { columns, rows } => (columns, rows),
1419                    _ => return Err("UNION requires query results on right side".into()),
1420                };
1421                let mut combined = left_rows;
1422                if *all {
1423                    combined.extend(right_rows);
1424                } else {
1425                    let mut seen = std::collections::HashSet::new();
1426                    for row in &combined {
1427                        seen.insert(row.clone());
1428                    }
1429                    for row in right_rows {
1430                        if seen.insert(row.clone()) {
1431                            combined.push(row);
1432                        }
1433                    }
1434                }
1435                Ok(QueryResult::Rows {
1436                    columns: left_cols,
1437                    rows: combined,
1438                })
1439            }
1440
1441            PlanNode::Explain { input } => {
1442                let text = format_plan_tree(input, 0);
1443                Ok(QueryResult::Rows {
1444                    columns: vec!["plan".to_string()],
1445                    rows: text
1446                        .lines()
1447                        .map(|line| vec![Value::Str(line.to_string())])
1448                        .collect(),
1449                })
1450            }
1451
1452            // All write variants — caller must escalate to the write lock.
1453            PlanNode::Insert { .. }
1454            | PlanNode::Update { .. }
1455            | PlanNode::Delete { .. }
1456            | PlanNode::Upsert { .. }
1457            | PlanNode::CreateTable { .. }
1458            | PlanNode::AlterTable { .. }
1459            | PlanNode::DropTable { .. }
1460            | PlanNode::CreateView { .. }
1461            | PlanNode::RefreshView { .. }
1462            | PlanNode::DropView { .. } => Err(READONLY_NEEDS_WRITE.to_string()),
1463        }
1464    }
1465
1466    /// `&self` variant of [`Engine::materialize_subqueries`]. Used by the
1467    /// read path so `Filter` predicates with `InSubquery`/`ExistsSubquery`
1468    /// children can evaluate their inner queries without taking the write
1469    /// lock. Inner queries that would themselves need a write (e.g. dirty
1470    /// view) escalate via [`READONLY_NEEDS_WRITE`] just like the top-level
1471    /// read path does.
1472    fn materialize_subqueries_readonly(&self, expr: &Expr) -> Result<Expr, String> {
1473        match expr {
1474            Expr::InSubquery {
1475                expr: inner,
1476                subquery,
1477                negated,
1478            } => {
1479                if is_correlated_subquery(subquery, &self.catalog) {
1480                    // Pass through — will be materialized per-row in the
1481                    // Filter handler's correlated subquery path.
1482                    let inner = self.materialize_subqueries_readonly(inner)?;
1483                    return Ok(Expr::InSubquery {
1484                        expr: Box::new(inner),
1485                        subquery: subquery.clone(),
1486                        negated: *negated,
1487                    });
1488                }
1489                let inner = self.materialize_subqueries_readonly(inner)?;
1490                let sub_plan = crate::planner::plan_statement(Statement::Query(*subquery.clone()))
1491                    .map_err(|e| e.to_string())?;
1492                let result = self.execute_plan_readonly(&sub_plan)?;
1493                let values = match result {
1494                    QueryResult::Rows { rows, .. } => rows
1495                        .into_iter()
1496                        .filter_map(|mut row| {
1497                            if row.is_empty() {
1498                                None
1499                            } else {
1500                                Some(value_to_expr(row.swap_remove(0)))
1501                            }
1502                        })
1503                        .collect(),
1504                    _ => Vec::new(),
1505                };
1506                Ok(Expr::InList {
1507                    expr: Box::new(inner),
1508                    list: values,
1509                    negated: *negated,
1510                })
1511            }
1512            Expr::ExistsSubquery { subquery, negated } => {
1513                if is_correlated_subquery(subquery, &self.catalog) {
1514                    return Ok(expr.clone());
1515                }
1516                let sub_plan = crate::planner::plan_statement(Statement::Query(*subquery.clone()))
1517                    .map_err(|e| e.to_string())?;
1518                let result = self.execute_plan_readonly(&sub_plan)?;
1519                let has_rows = match result {
1520                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
1521                    _ => false,
1522                };
1523                let truth = if *negated { !has_rows } else { has_rows };
1524                Ok(Expr::Literal(Literal::Bool(truth)))
1525            }
1526            Expr::BinaryOp(l, op, r) => {
1527                let l = self.materialize_subqueries_readonly(l)?;
1528                let r = self.materialize_subqueries_readonly(r)?;
1529                Ok(Expr::BinaryOp(Box::new(l), *op, Box::new(r)))
1530            }
1531            Expr::UnaryOp(op, inner) => {
1532                let inner = self.materialize_subqueries_readonly(inner)?;
1533                Ok(Expr::UnaryOp(*op, Box::new(inner)))
1534            }
1535            Expr::Case { whens, else_expr } => {
1536                let whens = whens
1537                    .iter()
1538                    .map(|(c, r)| {
1539                        let c = self.materialize_subqueries_readonly(c)?;
1540                        let r = self.materialize_subqueries_readonly(r)?;
1541                        Ok((Box::new(c), Box::new(r)))
1542                    })
1543                    .collect::<Result<Vec<_>, String>>()?;
1544                let else_expr = match else_expr {
1545                    Some(e) => Some(Box::new(self.materialize_subqueries_readonly(e)?)),
1546                    None => None,
1547                };
1548                Ok(Expr::Case { whens, else_expr })
1549            }
1550            other => Ok(other.clone()),
1551        }
1552    }
1553
1554    /// Per-row materialisation of correlated subqueries. For each row in the
1555    /// outer query, substitute outer column references in the subquery's
1556    /// filter with the current row's literal values, execute the modified
1557    /// subquery, and return the result as an InList or Bool literal.
1558    fn materialize_correlated_for_row_readonly(
1559        &self,
1560        expr: &Expr,
1561        outer_row: &[Value],
1562        outer_columns: &[String],
1563    ) -> Result<Expr, String> {
1564        match expr {
1565            Expr::InSubquery {
1566                expr: inner,
1567                subquery,
1568                negated,
1569            } => {
1570                let inner =
1571                    self.materialize_correlated_for_row_readonly(inner, outer_row, outer_columns)?;
1572                let mut sub = *subquery.clone();
1573                if let Some(ref filter) = sub.filter {
1574                    sub.filter = Some(substitute_outer_refs(
1575                        filter,
1576                        &sub.source,
1577                        &self.catalog,
1578                        outer_row,
1579                        outer_columns,
1580                    ));
1581                }
1582                let sub_plan = crate::planner::plan_statement(Statement::Query(sub))
1583                    .map_err(|e| e.to_string())?;
1584                let result = self.execute_plan_readonly(&sub_plan)?;
1585                let values = match result {
1586                    QueryResult::Rows { rows, .. } => rows
1587                        .into_iter()
1588                        .filter_map(|mut row| {
1589                            if row.is_empty() {
1590                                None
1591                            } else {
1592                                Some(value_to_expr(row.swap_remove(0)))
1593                            }
1594                        })
1595                        .collect(),
1596                    _ => Vec::new(),
1597                };
1598                Ok(Expr::InList {
1599                    expr: Box::new(inner),
1600                    list: values,
1601                    negated: *negated,
1602                })
1603            }
1604            Expr::ExistsSubquery { subquery, negated } => {
1605                let mut sub = *subquery.clone();
1606                if let Some(ref filter) = sub.filter {
1607                    sub.filter = Some(substitute_outer_refs(
1608                        filter,
1609                        &sub.source,
1610                        &self.catalog,
1611                        outer_row,
1612                        outer_columns,
1613                    ));
1614                }
1615                let sub_plan = crate::planner::plan_statement(Statement::Query(sub))
1616                    .map_err(|e| e.to_string())?;
1617                let result = self.execute_plan_readonly(&sub_plan)?;
1618                let has_rows = match result {
1619                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
1620                    _ => false,
1621                };
1622                let truth = if *negated { !has_rows } else { has_rows };
1623                Ok(Expr::Literal(Literal::Bool(truth)))
1624            }
1625            Expr::BinaryOp(l, op, r) => {
1626                let l =
1627                    self.materialize_correlated_for_row_readonly(l, outer_row, outer_columns)?;
1628                let r =
1629                    self.materialize_correlated_for_row_readonly(r, outer_row, outer_columns)?;
1630                Ok(Expr::BinaryOp(Box::new(l), *op, Box::new(r)))
1631            }
1632            Expr::UnaryOp(op, inner) => {
1633                let inner =
1634                    self.materialize_correlated_for_row_readonly(inner, outer_row, outer_columns)?;
1635                Ok(Expr::UnaryOp(*op, Box::new(inner)))
1636            }
1637            other => Ok(other.clone()),
1638        }
1639    }
1640
1641    /// Parse and plan a query once, returning a [`PreparedQuery`] handle
1642    /// the caller can execute repeatedly with fresh literal values.
1643    ///
1644    /// Mission C Phase 5: the plan cache already short-circuits repeat
1645    /// queries that share a shape, but every call still pays for
1646    /// `canonicalize` (lex + FNV hash) and a hashmap lookup. For a tight
1647    /// insert loop that's ~500-800ns of pure overhead per call on top of
1648    /// the caller's `format!()` cost. Prepared statements skip the lex,
1649    /// skip the hash, skip the format, and skip the cache lookup — the
1650    /// caller holds the plan template directly and hands us the new
1651    /// literals as a slice.
1652    ///
1653    /// The plan template holds whatever literal values the original query
1654    /// string contained; those are overwritten on every `execute_prepared`
1655    /// call, same way the plan cache does on a cache hit.
1656    ///
1657    /// The returned `param_count` matches the total number of
1658    /// `Expr::Literal` slots reachable from the plan, in the deterministic
1659    /// walk order used by `canonicalize` and the cache. Callers must pass
1660    /// exactly that many literals to `execute_prepared`, in the same order
1661    /// they appear in the source text.
1662    pub fn prepare(&mut self, query: &str) -> Result<PreparedQuery, String> {
1663        let plan = planner::plan(query).map_err(|e| e.to_string())?;
1664        let param_count = crate::plan_cache::count_literal_slots(&plan);
1665
1666        // Insert fast path: if the template is Insert and every assignment
1667        // RHS is a literal, resolve column indices once here and store
1668        // them. execute_prepared will skip the plan-clone + substitute
1669        // walk on this path.
1670        //
1671        // Mission C Phase 15: also cache `n_cols` and the target table
1672        // name so execute_prepared doesn't need a second HashMap lookup
1673        // on `self.catalog.schema(table)` just to size the scratch Vec.
1674        let insert_fast = match &plan {
1675            PlanNode::Insert { table, assignments }
1676                if assignments
1677                    .iter()
1678                    .all(|a| matches!(a.value, Expr::Literal(_)))
1679                    && param_count == assignments.len() =>
1680            {
1681                let table_slot = self
1682                    .catalog
1683                    .table_slot(table)
1684                    .ok_or_else(|| format!("table '{table}' not found"))?;
1685                let schema = &self.catalog.table_by_slot(table_slot).schema;
1686                let n_cols = schema.columns.len();
1687                let indices: Result<Vec<usize>, String> = assignments
1688                    .iter()
1689                    .map(|a| {
1690                        schema
1691                            .column_index(&a.field)
1692                            .ok_or_else(|| format!("column '{}' not found", a.field))
1693                    })
1694                    .collect();
1695                Some(InsertFast {
1696                    table_slot,
1697                    col_indices: indices?,
1698                    n_cols,
1699                })
1700            }
1701            _ => None,
1702        };
1703
1704        // Mission C Phase 14: update-by-pk fast path. Match on the shape
1705        // planner::plan_update builds for `T filter .pk = ? update
1706        // { col := ? }` — `Update { input: IndexScan(pk), assignments:
1707        // [{col, Literal}] }` — and only if every precondition holds:
1708        //   * `pk` is an indexed column (so the executor would take the
1709        //     btree.lookup path at run time regardless)
1710        //   * there's exactly one assignment
1711        //   * the assigned column is fixed-size and *not* indexed (so we
1712        //     don't have to maintain any secondary index on write)
1713        //   * both literal slots are already `Expr::Literal` (no computed
1714        //     expressions)
1715        // If any of these fail we fall through to the standard substitute
1716        // + execute path.
1717        let update_pk_fast = Self::try_build_update_pk_fast(&self.catalog, &plan);
1718
1719        Ok(PreparedQuery {
1720            plan_template: plan,
1721            param_count,
1722            insert_fast,
1723            update_pk_fast,
1724        })
1725    }
1726
1727    /// Mission C Phase 14: inspect a planned tree and, if it matches the
1728    /// `update_by_pk` fast-path shape, return the precomputed byte-patch
1729    /// metadata. Returns `None` on any mismatch — the caller falls through
1730    /// to the substitute-and-execute path, which is always correct.
1731    fn try_build_update_pk_fast(catalog: &Catalog, plan: &PlanNode) -> Option<UpdatePkFast> {
1732        // Top level must be `Update { input: IndexScan(...), ... }`.
1733        let (table, input, assignments) = match plan {
1734            PlanNode::Update {
1735                table,
1736                input,
1737                assignments,
1738            } => (table, input.as_ref(), assignments),
1739            _ => return None,
1740        };
1741        // Exactly one assignment — the bench hot path and the only case
1742        // where a single byte-patch covers the whole mutation.
1743        if assignments.len() != 1 {
1744            return None;
1745        }
1746        let assn = &assignments[0];
1747        // Assignment RHS must be a raw literal, not a computed expr.
1748        if !matches!(assn.value, Expr::Literal(_)) {
1749            return None;
1750        }
1751        // Input must be an IndexScan on the same table with a literal key.
1752        let (key_col, key_table) = match input {
1753            PlanNode::IndexScan {
1754                table: t,
1755                column,
1756                key: Expr::Literal(_),
1757            } => (column.clone(), t.clone()),
1758            _ => return None,
1759        };
1760        if &key_table != table {
1761            return None;
1762        }
1763
1764        // Look up schema + index state from the live catalog, caching
1765        // the slot so the execute path skips the name probe.
1766        let table_slot = catalog.table_slot(table)?;
1767        let tbl = catalog.table_by_slot(table_slot);
1768        let schema = &tbl.schema;
1769
1770        // Key column must have an index (the btree.lookup path is what
1771        // makes the fast path worth building).
1772        if !tbl.has_index(&key_col) {
1773            return None;
1774        }
1775
1776        // Target column must exist, be fixed-size, and NOT be indexed (so
1777        // we don't have to maintain any secondary index here).
1778        let target_col_idx = schema.column_index(&assn.field)?;
1779        let target_type = schema.columns[target_col_idx].type_id;
1780        if !is_fixed_size(target_type) {
1781            return None;
1782        }
1783        if tbl.has_indexed_col(target_col_idx) {
1784            return None;
1785        }
1786
1787        // Precompute byte offsets from the cached row layout.
1788        let layout = tbl.row_layout();
1789        let fixed_off = layout.fixed_offset(target_col_idx)?;
1790        let bitmap_size = layout.bitmap_size();
1791        let field_off = 2 + bitmap_size + fixed_off;
1792        let bitmap_byte_off = 2 + target_col_idx / 8;
1793        let bit_mask = 1u8 << (target_col_idx % 8);
1794
1795        // Literal walk order for `Update { IndexScan(key), [{value}] }`
1796        // (see `plan_cache::substitute_plan` — input first, then the
1797        // assignments). The filter key is literal 0, the assignment RHS
1798        // is literal 1.
1799        Some(UpdatePkFast {
1800            table_slot,
1801            key_col,
1802            field_off,
1803            bitmap_byte_off,
1804            bit_mask,
1805            target_type,
1806            key_literal_idx: 0,
1807            value_literal_idx: 1,
1808        })
1809    }
1810
1811    /// Execute a [`PreparedQuery`] with the given literal values.
1812    ///
1813    /// The literals are substituted into a clone of the template plan in
1814    /// the same deterministic walk order that [`crate::canonicalize`]
1815    /// produces (filter predicate first, then projection, then assignment
1816    /// RHS, and so on). Substitution errors here mean the caller passed
1817    /// the wrong number of literals for this query shape.
1818    pub fn execute_prepared(
1819        &mut self,
1820        prep: &PreparedQuery,
1821        literals: &[Literal],
1822    ) -> Result<QueryResult, String> {
1823        if literals.len() != prep.param_count {
1824            return Err(format!(
1825                "prepared query expects {} literal(s), got {}",
1826                prep.param_count,
1827                literals.len(),
1828            ));
1829        }
1830
1831        // Mission C Phase 14: update-by-pk fast path. Skip plan clone,
1832        // substitute walk, resolved_assignments, FastPatch, Vec<RowId>,
1833        // RowLayout::new — straight to btree.lookup_int + byte patch.
1834        // On rare mismatches (wrong literal type, index dropped after
1835        // prepare) the helper returns `Ok(None)` and we fall through to
1836        // the generic substitute-and-execute path below.
1837        if let Some(fast) = &prep.update_pk_fast {
1838            if let Some(result) = self.try_execute_update_pk_fast(fast, literals)? {
1839                // Mark dependent views dirty for prepared update fast path.
1840                if let PlanNode::Update { table, .. } = &prep.plan_template {
1841                    self.view_registry.mark_dependents_dirty(table);
1842                }
1843                // Mission B (post-review): statement-boundary WAL group
1844                // commit. The fast path appended an Update record but did
1845                // not flush — flush it now so the executor's contract is
1846                // "WAL is on disk before this returns".
1847                self.catalog.sync_wal().map_err(|e| e.to_string())?;
1848                return Ok(result);
1849            }
1850        }
1851
1852        // Insert fast path: skip plan-clone + substitute walk + PlanNode::Insert
1853        // arm's column-index resolution. Build the Row directly from the
1854        // caller's literal slice using indices we resolved at prepare time.
1855        // Saves ~300-500ns per insert on the bench.
1856        //
1857        // Mission C Phase 13: the scratch `Vec<Value>` is reused across
1858        // calls — no fresh allocation per insert. We split the borrow
1859        // between `self.catalog` and `self.insert_values_scratch` by
1860        // moving the scratch into a local, filling it, passing to the
1861        // catalog, and putting it back.
1862        //
1863        // Mission C Phase 15: the cached `InsertFast` carries `n_cols`
1864        // and the table name, so the hot path makes exactly one catalog
1865        // HashMap lookup (`get_table_mut`) and dispatches straight into
1866        // `tbl.insert` — no intermediate schema lookup, no generic
1867        // `Catalog::insert` wrapper.
1868        if let Some(fast) = &prep.insert_fast {
1869            let mut values = std::mem::take(&mut self.insert_values_scratch);
1870            values.clear();
1871            values.resize(fast.n_cols, Value::Empty);
1872            for (pos, lit) in literals.iter().enumerate() {
1873                values[fast.col_indices[pos]] = literal_value_from(lit);
1874            }
1875            // Mission C Phase 18: direct O(1) slot index — no
1876            // catalog hash probe. Slot was resolved at prepare time.
1877            let tbl = self.catalog.table_by_slot_mut(fast.table_slot);
1878            let res = tbl.insert(&values).map_err(|e| e.to_string());
1879            // Clear strings before returning the scratch — don't keep
1880            // dangling allocations from the previous row alive across
1881            // calls. `clear()` drops the Value::Str entries.
1882            values.clear();
1883            self.insert_values_scratch = values;
1884            res?;
1885            // Mark dependent views dirty for prepared insert fast path.
1886            if let PlanNode::Insert { table, .. } = &prep.plan_template {
1887                self.view_registry.mark_dependents_dirty(table);
1888            }
1889            // Mission B (post-review): statement-boundary WAL group commit.
1890            self.catalog.sync_wal().map_err(|e| e.to_string())?;
1891            return Ok(QueryResult::Modified(1));
1892        }
1893
1894        let mut plan = prep.plan_template.clone();
1895        let mut idx = 0usize;
1896        crate::plan_cache::substitute_plan(&mut plan, literals, &mut idx);
1897        debug_assert_eq!(idx, literals.len());
1898        let result = self.execute_plan(&plan);
1899        // Mission B (post-review): statement-boundary WAL group commit.
1900        // No-op when nothing was buffered (read-only plans).
1901        self.catalog.sync_wal().map_err(|e| e.to_string())?;
1902        result
1903    }
1904
1905    /// Mission C Phase 14: point-update fast path for prepared
1906    /// `T filter .pk = ? update { col := ? }` queries. The caller has
1907    /// already verified this is an int-indexed pk with a fixed-size,
1908    /// non-indexed target column; all we do here is pluck the two
1909    /// literals out of the caller's slice, run one `btree.lookup_int`,
1910    /// and patch 1–8 bytes of the row. No plan clone, no allocations.
1911    ///
1912    /// Returns:
1913    ///   * `Ok(Some(result))` — fast path took the mutation.
1914    ///   * `Ok(None)` — can't take the fast path this call (wrong
1915    ///     literal type, index dropped since prepare, etc.). Caller
1916    ///     falls through to the generic substitute-and-execute path.
1917    ///   * `Err(_)` — real error (table gone, I/O, etc.).
1918    #[inline]
1919    fn try_execute_update_pk_fast(
1920        &mut self,
1921        fast: &UpdatePkFast,
1922        literals: &[Literal],
1923    ) -> Result<Option<QueryResult>, String> {
1924        // 1) Extract the key literal. The fast path is only built for
1925        //    int key columns; any other literal type means the caller
1926        //    is violating the prepared-query contract or the schema
1927        //    changed — either way, fall back.
1928        let key_int = match &literals[fast.key_literal_idx] {
1929            Literal::Int(v) => *v,
1930            _ => return Ok(None),
1931        };
1932
1933        // 2) Encode the new value as little-endian bytes matching the
1934        //    target column's fixed encoding.
1935        let bytes: FixedBytes = match (fast.target_type, &literals[fast.value_literal_idx]) {
1936            (TypeId::Int, Literal::Int(v)) => FixedBytes::I64(v.to_le_bytes()),
1937            (TypeId::DateTime, Literal::Int(v)) => FixedBytes::I64(v.to_le_bytes()),
1938            (TypeId::Float, Literal::Float(v)) => FixedBytes::F64(v.to_le_bytes()),
1939            (TypeId::Bool, Literal::Bool(v)) => FixedBytes::Bool(if *v { 1 } else { 0 }),
1940            // Type mismatch — fall back to the generic path for a
1941            // consistent error shape.
1942            _ => return Ok(None),
1943        };
1944
1945        // 3) Look up the table + btree, do the int lookup, patch the row
1946        //    in place. Phase 18: table dispatch is a direct slot index;
1947        //    the btree lookup is the linear scan over `indexed_cols`.
1948        //    Single btree.lookup_int + one `with_row_bytes_mut` call.
1949        //    No Vec allocations at all.
1950        //
1951        // Mission B2: route the in-place patch through the catalog's
1952        // WAL-logged wrapper so crash recovery sees the update. The
1953        // extra cost is one WAL append + fsync per query — the hot
1954        // loop structure is unchanged.
1955        let tbl = self.catalog.table_by_slot_mut(fast.table_slot);
1956        let Some(btree) = tbl.index(&fast.key_col) else {
1957            // Index dropped since prepare — bail to the generic path.
1958            return Ok(None);
1959        };
1960        let Some(rid) = btree.lookup_int(key_int) else {
1961            return Ok(Some(QueryResult::Modified(0)));
1962        };
1963
1964        let fast_table_slot = fast.table_slot;
1965        let bitmap_byte_off = fast.bitmap_byte_off;
1966        let bit_mask = fast.bit_mask;
1967        let field_off = fast.field_off;
1968        let ok = self
1969            .catalog
1970            .update_row_bytes_logged_by_slot(fast_table_slot, rid, |row| {
1971                // Idempotent null-bit clear — safe even when the column was
1972                // already non-null (the overwhelmingly common case).
1973                row[bitmap_byte_off] &= !bit_mask;
1974                let field_bytes = bytes.as_slice();
1975                row[field_off..field_off + field_bytes.len()].copy_from_slice(field_bytes);
1976            })
1977            .map_err(|e| e.to_string())?;
1978
1979        Ok(Some(QueryResult::Modified(if ok { 1 } else { 0 })))
1980    }
1981
1982    /// Mission C Phase 13: moving variant of [`Engine::execute_prepared`]
1983    /// for the insert fast path. Takes `literals` by mutable reference
1984    /// so that each `Literal::String` can be consumed via `mem::take`
1985    /// instead of cloned into a `Value::Str`. On `insert_batch_1k` that
1986    /// removes three per-row heap allocations (name, status, email),
1987    /// bringing the workload over the line vs SQLite's amortized
1988    /// prepare+execute loop.
1989    ///
1990    /// The caller's `Literal::String` entries are replaced with empty
1991    /// strings on successful inserts — the `literals` slice is *not*
1992    /// left in a valid-for-reuse state except for `Int`/`Float`/`Bool`
1993    /// values. Non-insert templates fall through to the standard
1994    /// substitute-and-execute path.
1995    pub fn execute_prepared_take(
1996        &mut self,
1997        prep: &PreparedQuery,
1998        literals: &mut [Literal],
1999    ) -> Result<QueryResult, String> {
2000        if literals.len() != prep.param_count {
2001            return Err(format!(
2002                "prepared query expects {} literal(s), got {}",
2003                prep.param_count,
2004                literals.len(),
2005            ));
2006        }
2007
2008        if let Some(fast) = &prep.insert_fast {
2009            let mut values = std::mem::take(&mut self.insert_values_scratch);
2010            values.clear();
2011            values.resize(fast.n_cols, Value::Empty);
2012            for (pos, lit) in literals.iter_mut().enumerate() {
2013                values[fast.col_indices[pos]] = literal_value_take(lit);
2014            }
2015            // Mission C Phase 18: direct O(1) slot index — see
2016            // `execute_prepared` for rationale. This is the hot path
2017            // for `insert_batch_1k`.
2018            let tbl = self.catalog.table_by_slot_mut(fast.table_slot);
2019            let res = tbl.insert(&values).map_err(|e| e.to_string());
2020            values.clear();
2021            self.insert_values_scratch = values;
2022            res?;
2023            // Mission B (post-review): statement-boundary WAL group commit.
2024            self.catalog.sync_wal().map_err(|e| e.to_string())?;
2025            return Ok(QueryResult::Modified(1));
2026        }
2027
2028        // Non-insert templates — fall back to the standard path. We
2029        // can't usefully move the literals because `substitute_plan`
2030        // still expects an immutable slice, and the non-insert hot
2031        // paths are dominated by plan walks anyway.
2032        self.execute_prepared(prep, literals)
2033    }
2034
2035    /// Walk an expression tree and replace every `InSubquery` node with
2036    /// an `InList` by executing the subquery and collecting its first
2037    /// column as literal values. This must be called before entering
2038    /// the row-by-row scan loop because the scan closure can't call back
2039    /// into the engine.
2040    fn materialize_subqueries(&mut self, expr: &Expr) -> Result<Expr, String> {
2041        match expr {
2042            Expr::InSubquery {
2043                expr: inner,
2044                subquery,
2045                negated,
2046            } => {
2047                if is_correlated_subquery(subquery, &self.catalog) {
2048                    let inner = self.materialize_subqueries(inner)?;
2049                    return Ok(Expr::InSubquery {
2050                        expr: Box::new(inner),
2051                        subquery: subquery.clone(),
2052                        negated: *negated,
2053                    });
2054                }
2055                let inner = self.materialize_subqueries(inner)?;
2056                // Plan and execute the subquery.
2057                let sub_plan = crate::planner::plan_statement(Statement::Query(*subquery.clone()))
2058                    .map_err(|e| e.to_string())?;
2059                let result = self.execute_plan(&sub_plan)?;
2060                let values = match result {
2061                    QueryResult::Rows { rows, .. } => rows
2062                        .into_iter()
2063                        .filter_map(|mut row| {
2064                            if row.is_empty() {
2065                                None
2066                            } else {
2067                                Some(value_to_expr(row.swap_remove(0)))
2068                            }
2069                        })
2070                        .collect(),
2071                    _ => Vec::new(),
2072                };
2073                Ok(Expr::InList {
2074                    expr: Box::new(inner),
2075                    list: values,
2076                    negated: *negated,
2077                })
2078            }
2079            Expr::ExistsSubquery { subquery, negated } => {
2080                if is_correlated_subquery(subquery, &self.catalog) {
2081                    return Ok(expr.clone());
2082                }
2083                // Uncorrelated EXISTS: run the subquery once and collapse
2084                // into a Bool literal.
2085                let sub_plan = crate::planner::plan_statement(Statement::Query(*subquery.clone()))
2086                    .map_err(|e| e.to_string())?;
2087                let result = self.execute_plan(&sub_plan)?;
2088                let has_rows = match result {
2089                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
2090                    _ => false,
2091                };
2092                let truth = if *negated { !has_rows } else { has_rows };
2093                Ok(Expr::Literal(Literal::Bool(truth)))
2094            }
2095            Expr::BinaryOp(l, op, r) => {
2096                let l = self.materialize_subqueries(l)?;
2097                let r = self.materialize_subqueries(r)?;
2098                Ok(Expr::BinaryOp(Box::new(l), *op, Box::new(r)))
2099            }
2100            Expr::UnaryOp(op, inner) => {
2101                let inner = self.materialize_subqueries(inner)?;
2102                Ok(Expr::UnaryOp(*op, Box::new(inner)))
2103            }
2104            Expr::Case { whens, else_expr } => {
2105                let whens = whens
2106                    .iter()
2107                    .map(|(c, r)| {
2108                        let c = self.materialize_subqueries(c)?;
2109                        let r = self.materialize_subqueries(r)?;
2110                        Ok((Box::new(c), Box::new(r)))
2111                    })
2112                    .collect::<Result<Vec<_>, String>>()?;
2113                let else_expr = match else_expr {
2114                    Some(e) => Some(Box::new(self.materialize_subqueries(e)?)),
2115                    None => None,
2116                };
2117                Ok(Expr::Case { whens, else_expr })
2118            }
2119            // Leaf nodes: no subqueries possible.
2120            other => Ok(other.clone()),
2121        }
2122    }
2123
2124    /// Write-path per-row materialisation of correlated subqueries.
2125    fn materialize_correlated_for_row(
2126        &mut self,
2127        expr: &Expr,
2128        outer_row: &[Value],
2129        outer_columns: &[String],
2130    ) -> Result<Expr, String> {
2131        match expr {
2132            Expr::InSubquery {
2133                expr: inner,
2134                subquery,
2135                negated,
2136            } => {
2137                let inner = self.materialize_correlated_for_row(inner, outer_row, outer_columns)?;
2138                let mut sub = *subquery.clone();
2139                if let Some(ref filter) = sub.filter {
2140                    sub.filter = Some(substitute_outer_refs(
2141                        filter,
2142                        &sub.source,
2143                        &self.catalog,
2144                        outer_row,
2145                        outer_columns,
2146                    ));
2147                }
2148                let sub_plan = crate::planner::plan_statement(Statement::Query(sub))
2149                    .map_err(|e| e.to_string())?;
2150                let result = self.execute_plan(&sub_plan)?;
2151                let values = match result {
2152                    QueryResult::Rows { rows, .. } => rows
2153                        .into_iter()
2154                        .filter_map(|mut row| {
2155                            if row.is_empty() {
2156                                None
2157                            } else {
2158                                Some(value_to_expr(row.swap_remove(0)))
2159                            }
2160                        })
2161                        .collect(),
2162                    _ => Vec::new(),
2163                };
2164                Ok(Expr::InList {
2165                    expr: Box::new(inner),
2166                    list: values,
2167                    negated: *negated,
2168                })
2169            }
2170            Expr::ExistsSubquery { subquery, negated } => {
2171                let mut sub = *subquery.clone();
2172                if let Some(ref filter) = sub.filter {
2173                    sub.filter = Some(substitute_outer_refs(
2174                        filter,
2175                        &sub.source,
2176                        &self.catalog,
2177                        outer_row,
2178                        outer_columns,
2179                    ));
2180                }
2181                let sub_plan = crate::planner::plan_statement(Statement::Query(sub))
2182                    .map_err(|e| e.to_string())?;
2183                let result = self.execute_plan(&sub_plan)?;
2184                let has_rows = match result {
2185                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
2186                    _ => false,
2187                };
2188                let truth = if *negated { !has_rows } else { has_rows };
2189                Ok(Expr::Literal(Literal::Bool(truth)))
2190            }
2191            Expr::BinaryOp(l, op, r) => {
2192                let l = self.materialize_correlated_for_row(l, outer_row, outer_columns)?;
2193                let r = self.materialize_correlated_for_row(r, outer_row, outer_columns)?;
2194                Ok(Expr::BinaryOp(Box::new(l), *op, Box::new(r)))
2195            }
2196            Expr::UnaryOp(op, inner) => {
2197                let inner = self.materialize_correlated_for_row(inner, outer_row, outer_columns)?;
2198                Ok(Expr::UnaryOp(*op, Box::new(inner)))
2199            }
2200            other => Ok(other.clone()),
2201        }
2202    }
2203
2204    pub fn execute_plan(&mut self, plan: &PlanNode) -> Result<QueryResult, String> {
2205        match plan {
2206            PlanNode::SeqScan { table } => {
2207                // Auto-refresh dirty materialized views on read.
2208                if self.view_registry.is_dirty(table) {
2209                    self.refresh_view(table)?;
2210                }
2211                let schema = self
2212                    .catalog
2213                    .schema(table)
2214                    .ok_or_else(|| format!("table '{table}' not found"))?
2215                    .clone();
2216                let columns: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
2217                let rows: Vec<Vec<Value>> = self
2218                    .catalog
2219                    .scan(table)
2220                    .map_err(|e| e.to_string())?
2221                    .map(|(_, row)| row)
2222                    .collect();
2223                Ok(QueryResult::Rows { columns, rows })
2224            }
2225
2226            PlanNode::Filter { input, predicate } => {
2227                // Materialize any IN-subqueries in the predicate before the
2228                // scan loop — the closure can't call back into the engine.
2229                // Correlated subqueries are left in place for per-row eval.
2230                let materialized;
2231                let predicate = if contains_subquery(predicate) {
2232                    materialized = self.materialize_subqueries(predicate)?;
2233                    &materialized
2234                } else {
2235                    predicate
2236                };
2237
2238                // Correlated subquery path: per-row materialisation.
2239                if contains_subquery(predicate) {
2240                    let result = self.execute_plan(input)?;
2241                    return match result {
2242                        QueryResult::Rows { columns, rows } => {
2243                            let mut filtered = Vec::new();
2244                            for row in rows {
2245                                let row_pred =
2246                                    self.materialize_correlated_for_row(predicate, &row, &columns)?;
2247                                if eval_predicate(&row_pred, &row, &columns) {
2248                                    filtered.push(row);
2249                                }
2250                            }
2251                            Ok(QueryResult::Rows {
2252                                columns,
2253                                rows: filtered,
2254                            })
2255                        }
2256                        _ => Err("filter requires row input".into()),
2257                    };
2258                }
2259
2260                // Fast path: fuse Filter + SeqScan into a zero-copy streaming
2261                // loop. Uses decode_column() to evaluate the predicate on only
2262                // the columns it references, avoiding heap allocations for
2263                // String/Bytes columns that aren't part of the filter.
2264                if let PlanNode::SeqScan { table } = input.as_ref() {
2265                    // Auto-refresh dirty materialized views.
2266                    if self.view_registry.is_dirty(table) {
2267                        self.refresh_view(table)?;
2268                    }
2269                    let schema = self
2270                        .catalog
2271                        .schema(table)
2272                        .ok_or_else(|| format!("table '{table}' not found"))?
2273                        .clone();
2274                    let columns: Vec<String> =
2275                        schema.columns.iter().map(|c| c.name.clone()).collect();
2276                    let fast = FastLayout::new(&schema);
2277                    let row_layout = RowLayout::new(&schema);
2278                    // Mission F: pre-size to skip the first 4 Vec doublings
2279                    // (4 → 8 → 16 → 32 → 64). On a 100K-row scan with 30%
2280                    // selectivity that's ~4 fewer reallocations + memcpys.
2281                    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(64);
2282
2283                    // Try compiled predicate for the filter check (handles
2284                    // int leaves, string-eq leaves, and And conjunctions).
2285                    if let Some(compiled) = compile_predicate(predicate, &columns, &fast, &schema) {
2286                        self.catalog
2287                            .for_each_row_raw(table, |_rid, data| {
2288                                if compiled(data) {
2289                                    rows.push(decode_row(&schema, data));
2290                                }
2291                            })
2292                            .map_err(|e| e.to_string())?;
2293                    } else {
2294                        let pred_cols = predicate_column_indices(predicate, &columns);
2295                        self.catalog
2296                            .for_each_row_raw(table, |_rid, data| {
2297                                let pred_row =
2298                                    decode_selective(&schema, &row_layout, data, &pred_cols);
2299                                if eval_predicate(predicate, &pred_row, &columns) {
2300                                    rows.push(decode_row(&schema, data));
2301                                }
2302                            })
2303                            .map_err(|e| e.to_string())?;
2304                    }
2305
2306                    return Ok(QueryResult::Rows { columns, rows });
2307                }
2308
2309                // General path: materialise then filter.
2310                let result = self.execute_plan(input)?;
2311                match result {
2312                    QueryResult::Rows { columns, rows } => {
2313                        let filtered: Vec<Vec<Value>> = rows
2314                            .into_iter()
2315                            .filter(|row| eval_predicate(predicate, row, &columns))
2316                            .collect();
2317                        Ok(QueryResult::Rows {
2318                            columns,
2319                            rows: filtered,
2320                        })
2321                    }
2322                    _ => Err("filter requires row input".into()),
2323                }
2324            }
2325
2326            PlanNode::Project { input, fields } => {
2327                // Fast path: Project over IndexScan — decode only projected
2328                // columns from raw bytes instead of full decode_row.
2329                if let PlanNode::IndexScan { table, column, key } = input.as_ref() {
2330                    let schema = self
2331                        .catalog
2332                        .schema(table)
2333                        .ok_or_else(|| format!("table '{table}' not found"))?
2334                        .clone();
2335                    let all_columns: Vec<String> =
2336                        schema.columns.iter().map(|c| c.name.clone()).collect();
2337                    let key_value = literal_to_value(key)?;
2338                    let tbl = self
2339                        .catalog
2340                        .get_table(table)
2341                        .ok_or_else(|| format!("table '{table}' not found"))?;
2342
2343                    let proj_columns: Vec<String> = fields
2344                        .iter()
2345                        .map(|f| {
2346                            f.alias.clone().unwrap_or_else(|| match &f.expr {
2347                                Expr::Field(name) => name.clone(),
2348                                _ => "?".into(),
2349                            })
2350                        })
2351                        .collect();
2352
2353                    // Determine which column indices the projection needs
2354                    let proj_indices: Vec<usize> = fields
2355                        .iter()
2356                        .filter_map(|f| {
2357                            if let Expr::Field(name) = &f.expr {
2358                                all_columns.iter().position(|c| c == name)
2359                            } else {
2360                                None
2361                            }
2362                        })
2363                        .collect();
2364
2365                    if let Some(btree) = tbl.index(column) {
2366                        let layout = RowLayout::new(&schema);
2367                        // Mission D7: int-specialized lookup skips the
2368                        // `<Value as Ord>::cmp` discriminant dispatch on
2369                        // int-keyed indexes (the vast majority).
2370                        let lookup_result = match &key_value {
2371                            Value::Int(k) => btree.lookup_int(*k),
2372                            other => btree.lookup(other),
2373                        };
2374                        let rows = match lookup_result {
2375                            Some(rid) => match tbl.heap.get(rid) {
2376                                Some(data) => {
2377                                    let row: Vec<Value> = proj_indices
2378                                        .iter()
2379                                        .map(|&ci| decode_column(&schema, &layout, &data, ci))
2380                                        .collect();
2381                                    vec![row]
2382                                }
2383                                None => Vec::new(),
2384                            },
2385                            None => Vec::new(),
2386                        };
2387                        return Ok(QueryResult::Rows {
2388                            columns: proj_columns,
2389                            rows,
2390                        });
2391                    }
2392                }
2393
2394                // Fast path: Project(Limit(Sort(Filter(SeqScan)))) — bounded
2395                // top-N heap. Decodes only the sort key + projected columns,
2396                // keeps at most `limit` rows in a heap. Also handles the
2397                // Project(Limit(Sort(SeqScan))) variant (no filter).
2398                if let PlanNode::Limit {
2399                    input: inner,
2400                    count: limit_expr,
2401                } = input.as_ref()
2402                {
2403                    if let PlanNode::Sort {
2404                        input: sort_input,
2405                        keys,
2406                    } = inner.as_ref()
2407                    {
2408                        // Fast path only for single-key sorts
2409                        if keys.len() == 1 {
2410                            let sort_field = &keys[0].field;
2411                            let descending = keys[0].descending;
2412                            let limit = match limit_expr {
2413                                Expr::Literal(Literal::Int(v)) if *v >= 0 => *v as usize,
2414                                _ => usize::MAX,
2415                            };
2416                            let (table_opt, pred_opt): (Option<&str>, Option<&Expr>) =
2417                                match sort_input.as_ref() {
2418                                    PlanNode::SeqScan { table } => (Some(table.as_str()), None),
2419                                    PlanNode::Filter {
2420                                        input: fi,
2421                                        predicate,
2422                                    } => {
2423                                        if let PlanNode::SeqScan { table } = fi.as_ref() {
2424                                            (Some(table.as_str()), Some(predicate))
2425                                        } else {
2426                                            (None, None)
2427                                        }
2428                                    }
2429                                    _ => (None, None),
2430                                };
2431                            if let Some(table) = table_opt {
2432                                if let Some(result) = self.project_filter_sort_limit_fast(
2433                                    table, fields, sort_field, descending, limit, pred_opt,
2434                                )? {
2435                                    return Ok(result);
2436                                }
2437                            }
2438                        }
2439                    }
2440                    // Fast path: Project(Limit(Filter(SeqScan))) — stream,
2441                    // decode only projected columns, stop at limit.
2442                    if let PlanNode::Filter {
2443                        input: fi,
2444                        predicate,
2445                    } = inner.as_ref()
2446                    {
2447                        if let PlanNode::SeqScan { table } = fi.as_ref() {
2448                            let limit = match limit_expr {
2449                                Expr::Literal(Literal::Int(v)) if *v >= 0 => *v as usize,
2450                                _ => usize::MAX,
2451                            };
2452                            if let Some(result) = self.project_filter_limit_fast(
2453                                table,
2454                                fields,
2455                                limit,
2456                                Some(predicate),
2457                            )? {
2458                                return Ok(result);
2459                            }
2460                        }
2461                    }
2462                    // Fast path: Project(Limit(SeqScan)) — stream, no filter.
2463                    if let PlanNode::SeqScan { table } = inner.as_ref() {
2464                        let limit = match limit_expr {
2465                            Expr::Literal(Literal::Int(v)) if *v >= 0 => *v as usize,
2466                            _ => usize::MAX,
2467                        };
2468                        if let Some(result) =
2469                            self.project_filter_limit_fast(table, fields, limit, None)?
2470                        {
2471                            return Ok(result);
2472                        }
2473                    }
2474                }
2475
2476                // Mission D4: Project(Filter(SeqScan)) without Limit. Reuses
2477                // `project_filter_limit_fast` with limit = usize::MAX so the
2478                // hot loop decodes only projected columns and uses the
2479                // compiled predicate. Previously this fell through to the
2480                // generic Filter branch which materialised every column via
2481                // `decode_row` then re-projected — quadratic work.
2482                //
2483                // multi_col_and_filter (`U filter .age > 30 and .status =
2484                // "active" { .name, .age }`) was 6.18ms (0.7x SQLite) and
2485                // is the load-bearing workload for this fast path.
2486                if let PlanNode::Filter {
2487                    input: fi,
2488                    predicate,
2489                } = input.as_ref()
2490                {
2491                    if let PlanNode::SeqScan { table } = fi.as_ref() {
2492                        if let Some(result) = self.project_filter_limit_fast(
2493                            table,
2494                            fields,
2495                            usize::MAX,
2496                            Some(predicate),
2497                        )? {
2498                            return Ok(result);
2499                        }
2500                    }
2501                }
2502
2503                // Mission D4: Project(SeqScan) without Filter or Limit.
2504                // Decode only projected columns; the previous fall-through
2505                // built full Vec<Value> rows then re-projected.
2506                if let PlanNode::SeqScan { table } = input.as_ref() {
2507                    if let Some(result) =
2508                        self.project_filter_limit_fast(table, fields, usize::MAX, None)?
2509                    {
2510                        return Ok(result);
2511                    }
2512                }
2513
2514                let result = self.execute_plan(input)?;
2515                match result {
2516                    QueryResult::Rows { columns, rows } => {
2517                        let proj_columns: Vec<String> = fields
2518                            .iter()
2519                            .map(|f| {
2520                                f.alias.clone().unwrap_or_else(|| match &f.expr {
2521                                    Expr::Field(name) => name.clone(),
2522                                    // Mission E1.2: `{ u.name }` projects as the
2523                                    // qualified column name so callers can still
2524                                    // disambiguate across the join output.
2525                                    Expr::QualifiedField { qualifier, field } => {
2526                                        format!("{qualifier}.{field}")
2527                                    }
2528                                    _ => "?".into(),
2529                                })
2530                            })
2531                            .collect();
2532                        let proj_rows: Vec<Vec<Value>> = rows
2533                            .iter()
2534                            .map(|row| {
2535                                fields
2536                                    .iter()
2537                                    .map(|f| eval_expr(&f.expr, row, &columns))
2538                                    .collect()
2539                            })
2540                            .collect();
2541                        Ok(QueryResult::Rows {
2542                            columns: proj_columns,
2543                            rows: proj_rows,
2544                        })
2545                    }
2546                    _ => Err("project requires row input".into()),
2547                }
2548            }
2549
2550            PlanNode::Sort { input, keys } => {
2551                let result = self.execute_plan(input)?;
2552                match result {
2553                    QueryResult::Rows { columns, mut rows } => {
2554                        let key_indices: Vec<(usize, bool)> = keys
2555                            .iter()
2556                            .map(|k| {
2557                                columns
2558                                    .iter()
2559                                    .position(|c| c == &k.field)
2560                                    .map(|idx| (idx, k.descending))
2561                                    .ok_or_else(|| format!("column '{}' not found", k.field))
2562                            })
2563                            .collect::<Result<_, String>>()?;
2564                        rows.sort_by(|a, b| {
2565                            for &(col_idx, descending) in &key_indices {
2566                                let cmp = a[col_idx].cmp(&b[col_idx]);
2567                                let cmp = if descending { cmp.reverse() } else { cmp };
2568                                if cmp != std::cmp::Ordering::Equal {
2569                                    return cmp;
2570                                }
2571                            }
2572                            std::cmp::Ordering::Equal
2573                        });
2574                        Ok(QueryResult::Rows { columns, rows })
2575                    }
2576                    _ => Err("sort requires row input".into()),
2577                }
2578            }
2579
2580            PlanNode::Limit { input, count } => {
2581                let result = self.execute_plan(input)?;
2582                let n = match count {
2583                    Expr::Literal(Literal::Int(v)) => *v as usize,
2584                    _ => return Err("limit must be integer literal".into()),
2585                };
2586                match result {
2587                    QueryResult::Rows { columns, rows } => Ok(QueryResult::Rows {
2588                        columns,
2589                        rows: rows.into_iter().take(n).collect(),
2590                    }),
2591                    _ => Err("limit requires row input".into()),
2592                }
2593            }
2594
2595            PlanNode::Offset { input, count } => {
2596                let result = self.execute_plan(input)?;
2597                let n = match count {
2598                    Expr::Literal(Literal::Int(v)) => *v as usize,
2599                    _ => return Err("offset must be integer literal".into()),
2600                };
2601                match result {
2602                    QueryResult::Rows { columns, rows } => Ok(QueryResult::Rows {
2603                        columns,
2604                        rows: rows.into_iter().skip(n).collect(),
2605                    }),
2606                    _ => Err("offset requires row input".into()),
2607                }
2608            }
2609
2610            PlanNode::Aggregate {
2611                input,
2612                function,
2613                field,
2614            } => {
2615                // Fast path: count() over SeqScan — count rows without any decode
2616                if *function == AggFunc::Count {
2617                    if let PlanNode::SeqScan { table } = input.as_ref() {
2618                        let mut count: i64 = 0;
2619                        self.catalog
2620                            .for_each_row_raw(table, |_rid, _data| {
2621                                count += 1;
2622                            })
2623                            .map_err(|e| e.to_string())?;
2624                        return Ok(QueryResult::Scalar(Value::Int(count)));
2625                    }
2626                    // Fast path: count() over Filter(SeqScan) — try compiled
2627                    // predicate first, fall back to decode_column path.
2628                    if let PlanNode::Filter {
2629                        input: inner,
2630                        predicate,
2631                    } = input.as_ref()
2632                    {
2633                        if let PlanNode::SeqScan { table } = inner.as_ref() {
2634                            let schema = self
2635                                .catalog
2636                                .schema(table)
2637                                .ok_or_else(|| format!("table '{table}' not found"))?
2638                                .clone();
2639                            let columns: Vec<String> =
2640                                schema.columns.iter().map(|c| c.name.clone()).collect();
2641                            let fast = FastLayout::new(&schema);
2642                            let row_layout = RowLayout::new(&schema);
2643
2644                            // Try compiled predicate (zero-allocation hot path).
2645                            // Handles int leaves, string-eq leaves, AND conjunctions.
2646                            if let Some(compiled) =
2647                                compile_predicate(predicate, &columns, &fast, &schema)
2648                            {
2649                                let mut count: i64 = 0;
2650                                self.catalog
2651                                    .for_each_row_raw(table, |_rid, data| {
2652                                        if compiled(data) {
2653                                            count += 1;
2654                                        }
2655                                    })
2656                                    .map_err(|e| e.to_string())?;
2657                                return Ok(QueryResult::Scalar(Value::Int(count)));
2658                            }
2659
2660                            // Fallback: decode predicate columns
2661                            let pred_cols = predicate_column_indices(predicate, &columns);
2662                            let mut count: i64 = 0;
2663                            self.catalog
2664                                .for_each_row_raw(table, |_rid, data| {
2665                                    let pred_row =
2666                                        decode_selective(&schema, &row_layout, data, &pred_cols);
2667                                    if eval_predicate(predicate, &pred_row, &columns) {
2668                                        count += 1;
2669                                    }
2670                                })
2671                                .map_err(|e| e.to_string())?;
2672
2673                            return Ok(QueryResult::Scalar(Value::Int(count)));
2674                        }
2675                    }
2676                }
2677
2678                // Fast path: sum/avg/min/max over a single fixed-size int
2679                // column with an optional compiled filter predicate. Walks
2680                // raw row bytes, zero allocation per row.
2681                if matches!(
2682                    function,
2683                    AggFunc::Sum
2684                        | AggFunc::Avg
2685                        | AggFunc::Min
2686                        | AggFunc::Max
2687                        | AggFunc::CountDistinct
2688                ) {
2689                    if let Some(col) = field.as_ref() {
2690                        // Shape: Aggregate(SeqScan) or Aggregate(Filter(SeqScan))
2691                        let (table_opt, pred_opt): (Option<&str>, Option<&Expr>) =
2692                            match input.as_ref() {
2693                                PlanNode::SeqScan { table } => (Some(table.as_str()), None),
2694                                PlanNode::Filter {
2695                                    input: inner,
2696                                    predicate,
2697                                } => {
2698                                    if let PlanNode::SeqScan { table } = inner.as_ref() {
2699                                        (Some(table.as_str()), Some(predicate))
2700                                    } else {
2701                                        (None, None)
2702                                    }
2703                                }
2704                                _ => (None, None),
2705                            };
2706                        if let Some(table) = table_opt {
2707                            if let Some(result) =
2708                                self.agg_single_col_fast(table, col, *function, pred_opt)?
2709                            {
2710                                return Ok(result);
2711                            }
2712                        }
2713                    }
2714                }
2715
2716                // Fast path: Project(Limit(Filter(SeqScan))) — stream, decode
2717                // only projected columns, stop once we hit the limit.
2718                // (Handled in the Project branch; this branch only fires when
2719                // the aggregate is the outer node.)
2720                let result = self.execute_plan(input)?;
2721                match result {
2722                    QueryResult::Rows { columns, rows } => {
2723                        match function {
2724                            AggFunc::Count => {
2725                                Ok(QueryResult::Scalar(Value::Int(rows.len() as i64)))
2726                            }
2727                            AggFunc::CountDistinct => {
2728                                let col = field.as_ref().ok_or("count distinct requires field")?;
2729                                let idx = columns
2730                                    .iter()
2731                                    .position(|c| c == col)
2732                                    .ok_or("col not found")?;
2733                                let mut seen = std::collections::HashSet::new();
2734                                for row in &rows {
2735                                    let v = &row[idx];
2736                                    if !v.is_empty() {
2737                                        seen.insert(v.clone());
2738                                    }
2739                                }
2740                                Ok(QueryResult::Scalar(Value::Int(seen.len() as i64)))
2741                            }
2742                            AggFunc::Avg => {
2743                                let col = field.as_ref().ok_or("avg requires field")?;
2744                                let idx = columns
2745                                    .iter()
2746                                    .position(|c| c == col)
2747                                    .ok_or("col not found")?;
2748                                let sum: f64 = rows
2749                                    .iter()
2750                                    .filter_map(|r| match &r[idx] {
2751                                        Value::Int(v) => Some(*v as f64),
2752                                        Value::Float(v) => Some(*v),
2753                                        _ => None,
2754                                    })
2755                                    .sum();
2756                                let count = rows.len() as f64;
2757                                Ok(QueryResult::Scalar(Value::Float(sum / count)))
2758                            }
2759                            AggFunc::Sum => {
2760                                let col = field.as_ref().ok_or("sum requires field")?;
2761                                let idx = columns
2762                                    .iter()
2763                                    .position(|c| c == col)
2764                                    .ok_or("col not found")?;
2765                                // Track int and float contributions separately so
2766                                // Float columns (and mixed Int/Float rows) don't get
2767                                // silently dropped as they did in the Int-only
2768                                // version. If any Float is present, the whole sum
2769                                // promotes to Float — matching Avg's semantics.
2770                                let mut int_sum: i64 = 0;
2771                                let mut float_sum: f64 = 0.0;
2772                                let mut saw_float = false;
2773                                for r in &rows {
2774                                    match &r[idx] {
2775                                        Value::Int(v) => int_sum += *v,
2776                                        Value::Float(v) => {
2777                                            float_sum += *v;
2778                                            saw_float = true;
2779                                        }
2780                                        _ => {}
2781                                    }
2782                                }
2783                                let result = if saw_float {
2784                                    Value::Float(float_sum + int_sum as f64)
2785                                } else {
2786                                    Value::Int(int_sum)
2787                                };
2788                                Ok(QueryResult::Scalar(result))
2789                            }
2790                            AggFunc::Min | AggFunc::Max => {
2791                                let col = field.as_ref().ok_or("min/max requires field")?;
2792                                let idx = columns
2793                                    .iter()
2794                                    .position(|c| c == col)
2795                                    .ok_or("col not found")?;
2796                                let vals: Vec<&Value> = rows.iter().map(|r| &r[idx]).collect();
2797                                let result = if *function == AggFunc::Min {
2798                                    vals.into_iter().min().cloned()
2799                                } else {
2800                                    vals.into_iter().max().cloned()
2801                                };
2802                                Ok(QueryResult::Scalar(result.unwrap_or(Value::Empty)))
2803                            }
2804                        }
2805                    }
2806                    _ => Err("aggregate requires row input".into()),
2807                }
2808            }
2809
2810            PlanNode::Insert { table, assignments } => {
2811                // Mission C Phase 3: resolve column indices + literals under
2812                // a short-lived shared borrow on the catalog, then release
2813                // it before calling insert(). The previous code cloned the
2814                // full Schema (6+ String allocations on User) just to dodge
2815                // the borrow checker — a measurable 200-400ns on every
2816                // insert_single call in the bench.
2817                let values = {
2818                    let schema = self
2819                        .catalog
2820                        .schema(table)
2821                        .ok_or_else(|| format!("table '{table}' not found"))?;
2822                    let mut values = vec![Value::Empty; schema.columns.len()];
2823                    for a in assignments {
2824                        let idx = schema
2825                            .column_index(&a.field)
2826                            .ok_or_else(|| format!("column '{}' not found", a.field))?;
2827                        values[idx] = literal_to_value(&a.value)?;
2828                    }
2829                    values
2830                };
2831                self.catalog
2832                    .insert(table, &values)
2833                    .map_err(|e| e.to_string())?;
2834                self.view_registry.mark_dependents_dirty(table);
2835                Ok(QueryResult::Modified(1))
2836            }
2837
2838            PlanNode::Upsert {
2839                table,
2840                key_column,
2841                assignments,
2842                on_conflict,
2843            } => {
2844                // Build the insert values from assignments.
2845                let (values, key_idx) = {
2846                    let schema = self
2847                        .catalog
2848                        .schema(table)
2849                        .ok_or_else(|| format!("table '{table}' not found"))?;
2850                    let mut values = vec![Value::Empty; schema.columns.len()];
2851                    for a in assignments {
2852                        let idx = schema
2853                            .column_index(&a.field)
2854                            .ok_or_else(|| format!("column '{}' not found", a.field))?;
2855                        values[idx] = literal_to_value(&a.value)?;
2856                    }
2857                    let key_idx = schema
2858                        .column_index(key_column)
2859                        .ok_or_else(|| format!("key column '{key_column}' not found"))?;
2860                    (values, key_idx)
2861                };
2862
2863                let key_value = values[key_idx].clone();
2864
2865                // Probe the index for a conflict.
2866                let existing = {
2867                    let tbl = self
2868                        .catalog
2869                        .get_table(table)
2870                        .ok_or_else(|| format!("table '{table}' not found"))?;
2871                    if let Some(btree) = tbl.index(key_column) {
2872                        let hit = match &key_value {
2873                            Value::Int(k) => btree.lookup_int(*k),
2874                            other => btree.lookup(other),
2875                        };
2876                        hit.and_then(|rid| {
2877                            tbl.heap
2878                                .get(rid)
2879                                .map(|data| (rid, decode_row(&tbl.schema, &data)))
2880                        })
2881                    } else {
2882                        // No index — linear scan for the key.
2883                        let mut found = None;
2884                        for (rid, row) in tbl.scan() {
2885                            if row[key_idx] == key_value {
2886                                found = Some((rid, row));
2887                                break;
2888                            }
2889                        }
2890                        found
2891                    }
2892                };
2893
2894                if let Some((rid, mut existing_row)) = existing {
2895                    // Conflict: apply on_conflict assignments (or all non-key if empty).
2896                    let update_assignments = if on_conflict.is_empty() {
2897                        assignments
2898                    } else {
2899                        on_conflict
2900                    };
2901                    let changed_cols: Vec<usize> = {
2902                        let schema = self
2903                            .catalog
2904                            .schema(table)
2905                            .ok_or_else(|| format!("table '{table}' not found"))?;
2906                        let mut indices = Vec::new();
2907                        for a in update_assignments {
2908                            let idx = schema
2909                                .column_index(&a.field)
2910                                .ok_or_else(|| format!("column '{}' not found", a.field))?;
2911                            if idx != key_idx {
2912                                existing_row[idx] = literal_to_value(&a.value)?;
2913                                indices.push(idx);
2914                            }
2915                        }
2916                        indices
2917                    };
2918                    self.catalog
2919                        .update_hinted(table, rid, &existing_row, Some(&changed_cols))
2920                        .map_err(|e| e.to_string())?;
2921                    self.view_registry.mark_dependents_dirty(table);
2922                    Ok(QueryResult::Modified(1))
2923                } else {
2924                    // No conflict: insert.
2925                    self.catalog
2926                        .insert(table, &values)
2927                        .map_err(|e| e.to_string())?;
2928                    self.view_registry.mark_dependents_dirty(table);
2929                    Ok(QueryResult::Modified(1))
2930                }
2931            }
2932
2933            PlanNode::Update {
2934                input,
2935                table,
2936                assignments,
2937            } => {
2938                // Mission C Phase 3: resolve assignments against a borrowed
2939                // schema, then drop the borrow before the mutation loop.
2940                // Try literal-only path first; fall back to per-row expression
2941                // evaluation if any assignment contains a non-literal expression
2942                // (e.g., `age := .age + 1`).
2943                let (col_indices, literal_vals): (Vec<usize>, Option<Vec<Value>>) = {
2944                    let schema_ref = self
2945                        .catalog
2946                        .schema(table)
2947                        .ok_or_else(|| format!("table '{table}' not found"))?;
2948                    let indices: Vec<usize> = assignments
2949                        .iter()
2950                        .map(|a| {
2951                            schema_ref
2952                                .column_index(&a.field)
2953                                .ok_or_else(|| format!("column '{}' not found", a.field))
2954                        })
2955                        .collect::<Result<_, _>>()?;
2956                    let vals: Result<Vec<Value>, _> = assignments
2957                        .iter()
2958                        .map(|a| literal_to_value(&a.value))
2959                        .collect();
2960                    (indices, vals.ok())
2961                };
2962                let resolved_assignments: Option<Vec<(usize, Value)>> =
2963                    literal_vals.map(|vals| col_indices.iter().copied().zip(vals).collect());
2964
2965                // Mission C Phase 2: the hint Table::update_hinted needs to
2966                // decide whether to read the old row for index diff.
2967                let changed_cols: Vec<usize> = col_indices.clone();
2968
2969                // ── Fused scan+update for Update(Filter(SeqScan)) ────────
2970                // Perf sprint: instead of the two-pass collect-RIDs-then-loop
2971                // pattern (which pays one ensure_hot per matched row on the
2972                // second pass), fuse the predicate evaluation and in-place
2973                // byte-level mutation into a single heap walk. Same idea as
2974                // the fused scan_delete_matching path for deletes.
2975                if let Some(ref resolved_assignments) = resolved_assignments {
2976                    if let PlanNode::Filter {
2977                        input: inner,
2978                        predicate,
2979                    } = input.as_ref()
2980                    {
2981                        if let PlanNode::SeqScan { table: t } = inner.as_ref() {
2982                            if t == table {
2983                                let fused_result = self.try_fused_scan_update(
2984                                    table,
2985                                    predicate,
2986                                    resolved_assignments,
2987                                    &changed_cols,
2988                                );
2989                                if let Some(result) = fused_result {
2990                                    return result;
2991                                }
2992                            }
2993                        }
2994                    }
2995                }
2996
2997                // Collect matching RowIds in a single pass.
2998                let matching_rids = self.collect_rids_for_mutation(input, table)?;
2999
3000                // ── Literal-only fast paths ─────────────────────────────
3001                if let Some(ref resolved_assignments) = resolved_assignments {
3002                    // Mission C Phase 4: in-place byte-patch fast path. If every
3003                    // assignment targets a fixed-size non-null column AND none of
3004                    // them is indexed, we can skip decode_row / Vec<Value> /
3005                    // encode_row_into entirely and patch the row's raw bytes on
3006                    // the hot page.
3007                    let fast_patch: Option<Vec<FastPatch>> = {
3008                        let tbl = self
3009                            .catalog
3010                            .get_table(table)
3011                            .ok_or_else(|| format!("table '{table}' not found"))?;
3012                        let schema = &tbl.schema;
3013                        let all_fixed_nonnull = resolved_assignments.iter().all(|(idx, val)| {
3014                            is_fixed_size(schema.columns[*idx].type_id) && !val.is_empty()
3015                        });
3016                        let no_indexed = !resolved_assignments
3017                            .iter()
3018                            .any(|(idx, _)| tbl.has_indexed_col(*idx));
3019
3020                        if all_fixed_nonnull && no_indexed {
3021                            let layout = RowLayout::new(schema);
3022                            let bitmap_size = layout.bitmap_size();
3023                            let patches: Vec<FastPatch> = resolved_assignments
3024                                .iter()
3025                                .map(|(idx, val)| {
3026                                    let fixed_off = layout
3027                                        .fixed_offset(*idx)
3028                                        .expect("is_fixed_size already checked");
3029                                    let field_off = 2 + bitmap_size + fixed_off;
3030                                    let bytes: FixedBytes = match val {
3031                                        Value::Int(v) => FixedBytes::I64(v.to_le_bytes()),
3032                                        Value::Float(v) => FixedBytes::F64(v.to_le_bytes()),
3033                                        Value::Bool(v) => FixedBytes::Bool(if *v { 1 } else { 0 }),
3034                                        Value::DateTime(v) => FixedBytes::I64(v.to_le_bytes()),
3035                                        Value::Uuid(v) => FixedBytes::Uuid(*v),
3036                                        _ => unreachable!("all_fixed_nonnull guard lied"),
3037                                    };
3038                                    FastPatch {
3039                                        field_off,
3040                                        bitmap_byte_off: 2 + idx / 8,
3041                                        bit_mask: 1u8 << (idx % 8),
3042                                        bytes,
3043                                    }
3044                                })
3045                                .collect();
3046                            Some(patches)
3047                        } else {
3048                            None
3049                        }
3050                    };
3051
3052                    if let Some(patches) = fast_patch {
3053                        let mut count = 0u64;
3054                        for rid in matching_rids {
3055                            // Mission B2: WAL-log every patch so crash
3056                            // recovery replays the update. Same mutation
3057                            // closure as before — the wrapper just sandwiches
3058                            // it between a hot-page read and a WAL append.
3059                            let ok = self
3060                                .catalog
3061                                .update_row_bytes_logged(table, rid, |row| {
3062                                    for p in &patches {
3063                                        row[p.bitmap_byte_off] &= !p.bit_mask;
3064                                        let field_bytes = p.bytes.as_slice();
3065                                        row[p.field_off..p.field_off + field_bytes.len()]
3066                                            .copy_from_slice(field_bytes);
3067                                    }
3068                                })
3069                                .map_err(|e| e.to_string())?;
3070                            if ok {
3071                                count += 1;
3072                            }
3073                        }
3074                        self.view_registry.mark_dependents_dirty(table);
3075                        return Ok(QueryResult::Modified(count));
3076                    }
3077
3078                    // Mission C Phase 10: var-column in-place shrink fast path.
3079                    let var_fast: Option<(usize, Option<Vec<u8>>)> = {
3080                        let tbl = self
3081                            .catalog
3082                            .get_table(table)
3083                            .ok_or_else(|| format!("table '{table}' not found"))?;
3084                        let schema = &tbl.schema;
3085                        let is_single = resolved_assignments.len() == 1;
3086                        let is_var_col = is_single
3087                            && !is_fixed_size(schema.columns[resolved_assignments[0].0].type_id);
3088                        let no_indexed = !resolved_assignments
3089                            .iter()
3090                            .any(|(idx, _)| tbl.has_indexed_col(*idx));
3091
3092                        if is_single && is_var_col && no_indexed {
3093                            let (idx, val) = &resolved_assignments[0];
3094                            let bytes_opt: Option<Vec<u8>> = match val {
3095                                Value::Str(s) => Some(s.as_bytes().to_vec()),
3096                                Value::Bytes(b) => Some(b.clone()),
3097                                Value::Empty => None,
3098                                _ => {
3099                                    return Err(format!(
3100                                "type mismatch: cannot assign non-var value to var column '{}'",
3101                                schema.columns[*idx].name
3102                            ))
3103                                }
3104                            };
3105                            Some((*idx, bytes_opt))
3106                        } else {
3107                            None
3108                        }
3109                    };
3110
3111                    if let Some((col_idx, new_bytes_opt)) = var_fast {
3112                        let new_bytes_ref: Option<&[u8]> = new_bytes_opt.as_deref();
3113                        let mut count = 0u64;
3114                        let mut fallback_rids: Vec<RowId> = Vec::new();
3115                        for rid in &matching_rids {
3116                            // Mission B2: logged variant so crash recovery
3117                            // replays the shrink. On a false return (row
3118                            // would have to grow), the rid is pushed to
3119                            // `fallback_rids` and the slower `update_hinted`
3120                            // path — which is already WAL-logged — picks it up.
3121                            let ok = self
3122                                .catalog
3123                                .patch_var_col_logged(table, *rid, col_idx, new_bytes_ref)
3124                                .map_err(|e| e.to_string())?;
3125                            if ok {
3126                                count += 1;
3127                            } else {
3128                                fallback_rids.push(*rid);
3129                            }
3130                        }
3131                        for rid in fallback_rids {
3132                            let mut row = match self.catalog.get(table, rid) {
3133                                Some(r) => r,
3134                                None => continue,
3135                            };
3136                            for (idx, val) in resolved_assignments.iter() {
3137                                row[*idx] = val.clone();
3138                            }
3139                            self.catalog
3140                                .update_hinted(table, rid, &row, Some(&changed_cols))
3141                                .map_err(|e| e.to_string())?;
3142                            count += 1;
3143                        }
3144                        self.view_registry.mark_dependents_dirty(table);
3145                        return Ok(QueryResult::Modified(count));
3146                    }
3147
3148                    // Generic literal path: decode row, apply literal values.
3149                    let mut count = 0u64;
3150                    for rid in matching_rids {
3151                        let mut row = match self.catalog.get(table, rid) {
3152                            Some(r) => r,
3153                            None => continue,
3154                        };
3155                        for (idx, val) in resolved_assignments.iter() {
3156                            row[*idx] = val.clone();
3157                        }
3158                        self.catalog
3159                            .update_hinted(table, rid, &row, Some(&changed_cols))
3160                            .map_err(|e| e.to_string())?;
3161                        count += 1;
3162                    }
3163                    self.view_registry.mark_dependents_dirty(table);
3164                    return Ok(QueryResult::Modified(count));
3165                } // end if let Some(resolved_assignments)
3166
3167                // ── Expression-based update path ────────────────────────
3168                // At least one assignment contains a non-literal expression
3169                // (e.g., `age := .age + 1`). Evaluate per-row.
3170                let col_names: Vec<String> = {
3171                    let schema_ref = self
3172                        .catalog
3173                        .schema(table)
3174                        .ok_or_else(|| format!("table '{table}' not found"))?;
3175                    schema_ref.columns.iter().map(|c| c.name.clone()).collect()
3176                };
3177                let mut count = 0u64;
3178                for rid in matching_rids {
3179                    let mut row = match self.catalog.get(table, rid) {
3180                        Some(r) => r,
3181                        None => continue,
3182                    };
3183                    for (i, asgn) in assignments.iter().enumerate() {
3184                        let val = eval_expr(&asgn.value, &row, &col_names);
3185                        row[col_indices[i]] = val;
3186                    }
3187                    self.catalog
3188                        .update_hinted(table, rid, &row, Some(&changed_cols))
3189                        .map_err(|e| e.to_string())?;
3190                    count += 1;
3191                }
3192                self.view_registry.mark_dependents_dirty(table);
3193                Ok(QueryResult::Modified(count))
3194            }
3195
3196            PlanNode::Delete { input, table } => {
3197                // Mission C Phase 3: no schema clone — collect_rids_for_mutation
3198                // looks up schema internally when it needs one, and the mutation
3199                // loop doesn't need the schema at all.
3200                //
3201                // Mission C Phase 12: route bulk deletes through
3202                // `Catalog::delete_many`, which batches the btree leaf
3203                // compaction and shares one `ensure_hot` per row between
3204                // the index-key extraction and the slot delete. On
3205                // `delete_by_filter` (100K fixture, ~20K matches) that
3206                // removes ~4ms of pure `Vec::remove` memmove from the btree
3207                // maintenance phase.
3208                //
3209                // Mission C Phase 16: for the common `delete where ...`
3210                // shape (Filter(SeqScan)) — and the rarer "delete
3211                // everything" shape (SeqScan) — skip the two-pass
3212                // `collect_rids_for_mutation` + `delete_many` flow entirely.
3213                // The fused `scan_delete_matching` primitive walks the
3214                // heap exactly once, paying one `ensure_hot` per page
3215                // instead of per-row. That closes the last major gap on
3216                // the bench's `delete_by_filter` workload.
3217                if let PlanNode::Filter {
3218                    input: inner,
3219                    predicate,
3220                } = input.as_ref()
3221                {
3222                    if let PlanNode::SeqScan { table: t } = inner.as_ref() {
3223                        if t == table {
3224                            let schema = self
3225                                .catalog
3226                                .schema(table)
3227                                .ok_or_else(|| format!("table '{table}' not found"))?;
3228                            let columns: Vec<String> =
3229                                schema.columns.iter().map(|c| c.name.clone()).collect();
3230                            let fast = FastLayout::new(schema);
3231                            if let Some(compiled) =
3232                                compile_predicate(predicate, &columns, &fast, schema)
3233                            {
3234                                // Mission B2: logged variant so every
3235                                // matched rid hits the WAL during the
3236                                // single-pass scan. Structure of the
3237                                // fused scan is unchanged — only the
3238                                // hook closure now also appends.
3239                                let count = self
3240                                    .catalog
3241                                    .scan_delete_matching_logged(table, |data| compiled(data))
3242                                    .map_err(|e| e.to_string())?;
3243                                self.view_registry.mark_dependents_dirty(table);
3244                                return Ok(QueryResult::Modified(count));
3245                            }
3246                        }
3247                    }
3248                } else if let PlanNode::SeqScan { table: t } = input.as_ref() {
3249                    if t == table {
3250                        // `delete from T` with no predicate — every live
3251                        // row matches. One pass is still the right shape.
3252                        // Mission B2: logged variant — see above.
3253                        let count = self
3254                            .catalog
3255                            .scan_delete_matching_logged(table, |_| true)
3256                            .map_err(|e| e.to_string())?;
3257                        self.view_registry.mark_dependents_dirty(table);
3258                        return Ok(QueryResult::Modified(count));
3259                    }
3260                }
3261
3262                let matching_rids = self.collect_rids_for_mutation(input, table)?;
3263                let count = self
3264                    .catalog
3265                    .delete_many(table, &matching_rids)
3266                    .map_err(|e| e.to_string())?;
3267                self.view_registry.mark_dependents_dirty(table);
3268                Ok(QueryResult::Modified(count))
3269            }
3270
3271            PlanNode::AliasScan { table, alias } => {
3272                // Mission E1.2: scan `table` and rename every output column
3273                // to `alias.field`. Used as a join leaf so downstream
3274                // NestedLoopJoin + Filter + Project nodes can resolve
3275                // `Expr::QualifiedField` lookups by direct column-name match.
3276                //
3277                // We don't bother with a fused zero-copy loop here yet — the
3278                // whole join path is nested-loop and correctness-first
3279                // (Phase E1.3 will introduce hash join and at that point we
3280                // can revisit whether to specialise AliasScan).
3281                let schema = self
3282                    .catalog
3283                    .schema(table)
3284                    .ok_or_else(|| format!("table '{table}' not found"))?
3285                    .clone();
3286                let columns: Vec<String> = schema
3287                    .columns
3288                    .iter()
3289                    .map(|c| format!("{alias}.{}", c.name))
3290                    .collect();
3291                let rows: Vec<Vec<Value>> = self
3292                    .catalog
3293                    .scan(table)
3294                    .map_err(|e| e.to_string())?
3295                    .map(|(_, row)| row)
3296                    .collect();
3297                Ok(QueryResult::Rows { columns, rows })
3298            }
3299
3300            PlanNode::NestedLoopJoin {
3301                left,
3302                right,
3303                on,
3304                kind,
3305            } => {
3306                // Materialise both sides. The executor ships two strategies:
3307                //   1. Hash join (E1.3) — when the `on` predicate is a
3308                //      simple equi-predicate `left_col = right_col`, build a
3309                //      FxHashMap<Value, Vec<row_idx>> over the right side
3310                //      and probe with the left side. O(L + R) instead of
3311                //      O(L × R). Handles Inner and LeftOuter.
3312                //   2. Nested loop (E1.2) — fallback for Cross, non-equi
3313                //      predicates, or `on` expressions that reference
3314                //      either side with something more complex than a
3315                //      QualifiedField.
3316                let left_result = self.execute_plan(left)?;
3317                let right_result = self.execute_plan(right)?;
3318                let (left_columns, left_rows) = match left_result {
3319                    QueryResult::Rows { columns, rows } => (columns, rows),
3320                    _ => return Err("join left side must produce rows".into()),
3321                };
3322                let (right_columns, right_rows) = match right_result {
3323                    QueryResult::Rows { columns, rows } => (columns, rows),
3324                    _ => return Err("join right side must produce rows".into()),
3325                };
3326
3327                // Hash-join fast path.
3328                if !matches!(kind, JoinKind::Cross) {
3329                    if let Some(pred) = on {
3330                        if let Some((l_idx, r_idx)) =
3331                            try_extract_equi_join_keys(pred, &left_columns, &right_columns)
3332                        {
3333                            return Ok(hash_join(
3334                                left_columns,
3335                                left_rows,
3336                                right_columns,
3337                                right_rows,
3338                                l_idx,
3339                                r_idx,
3340                                *kind,
3341                            ));
3342                        }
3343                    }
3344                }
3345
3346                // Nested-loop fallback.
3347                let n_left = left_columns.len();
3348                let n_right = right_columns.len();
3349                let mut columns = Vec::with_capacity(n_left + n_right);
3350                columns.extend(left_columns);
3351                columns.extend(right_columns);
3352
3353                let mut rows: Vec<Vec<Value>> = Vec::with_capacity(left_rows.len());
3354                let mut combined: Vec<Value> = Vec::with_capacity(n_left + n_right);
3355
3356                for left_row in &left_rows {
3357                    let mut matched = false;
3358                    for right_row in &right_rows {
3359                        combined.clear();
3360                        combined.extend_from_slice(left_row);
3361                        combined.extend_from_slice(right_row);
3362                        let keep = match kind {
3363                            JoinKind::Cross => true,
3364                            JoinKind::Inner | JoinKind::LeftOuter => match on {
3365                                Some(pred) => eval_predicate(pred, &combined, &columns),
3366                                // Missing `on` for non-cross joins is a
3367                                // parser error, but if it slips through we
3368                                // treat it as "match everything".
3369                                None => true,
3370                            },
3371                            // RightOuter is rewritten to LeftOuter by the
3372                            // planner, so we never see it here.
3373                            JoinKind::RightOuter => {
3374                                unreachable!("planner rewrites RightOuter to LeftOuter")
3375                            }
3376                        };
3377                        if keep {
3378                            rows.push(combined.clone());
3379                            matched = true;
3380                        }
3381                    }
3382                    if !matched && matches!(kind, JoinKind::LeftOuter) {
3383                        let mut row = Vec::with_capacity(n_left + n_right);
3384                        row.extend_from_slice(left_row);
3385                        row.resize(n_left + n_right, Value::Empty);
3386                        rows.push(row);
3387                    }
3388                }
3389
3390                Ok(QueryResult::Rows { columns, rows })
3391            }
3392
3393            PlanNode::Distinct { input } => {
3394                let result = self.execute_plan(input)?;
3395                match result {
3396                    QueryResult::Rows { columns, rows } => {
3397                        let mut seen = std::collections::HashSet::new();
3398                        let mut unique_rows = Vec::new();
3399                        for row in rows {
3400                            if seen.insert(row.clone()) {
3401                                unique_rows.push(row);
3402                            }
3403                        }
3404                        Ok(QueryResult::Rows {
3405                            columns,
3406                            rows: unique_rows,
3407                        })
3408                    }
3409                    other => Ok(other),
3410                }
3411            }
3412
3413            PlanNode::GroupBy {
3414                input,
3415                keys,
3416                aggregates,
3417                having,
3418            } => {
3419                let result = self.execute_plan(input)?;
3420                match result {
3421                    QueryResult::Rows { columns, rows } => {
3422                        // Resolve key column indices.
3423                        let key_indices: Vec<usize> = keys
3424                            .iter()
3425                            .map(|k| {
3426                                columns
3427                                    .iter()
3428                                    .position(|c| c == k)
3429                                    .ok_or_else(|| format!("group-by column '{k}' not found"))
3430                            })
3431                            .collect::<Result<Vec<_>, _>>()?;
3432
3433                        // Resolve aggregate field indices. count(*) uses
3434                        // sentinel usize::MAX — compute_group_aggregate
3435                        // treats it as "count all rows in the group".
3436                        let agg_field_indices: Vec<usize> = aggregates
3437                            .iter()
3438                            .map(|a| {
3439                                if a.field == "*" {
3440                                    Ok(usize::MAX)
3441                                } else {
3442                                    columns.iter().position(|c| c == &a.field).ok_or_else(|| {
3443                                        format!("aggregate column '{}' not found", a.field)
3444                                    })
3445                                }
3446                            })
3447                            .collect::<Result<Vec<_>, _>>()?;
3448
3449                        // Group rows by key values (preserving insertion order).
3450                        let mut group_map: rustc_hash::FxHashMap<Vec<Value>, usize> =
3451                            rustc_hash::FxHashMap::default();
3452                        let mut groups: Vec<(Vec<Value>, Vec<usize>)> = Vec::new();
3453                        for (ri, row) in rows.iter().enumerate() {
3454                            let key: Vec<Value> =
3455                                key_indices.iter().map(|&i| row[i].clone()).collect();
3456                            match group_map.get(&key) {
3457                                Some(&idx) => groups[idx].1.push(ri),
3458                                None => {
3459                                    let idx = groups.len();
3460                                    group_map.insert(key.clone(), idx);
3461                                    groups.push((key, vec![ri]));
3462                                }
3463                            }
3464                        }
3465
3466                        // Build output column names: keys ++ aggregate output names.
3467                        let mut out_columns: Vec<String> = keys.clone();
3468                        for agg in aggregates.iter() {
3469                            out_columns.push(agg.output_name.clone());
3470                        }
3471
3472                        // Compute aggregates per group.
3473                        let mut out_rows: Vec<Vec<Value>> = Vec::with_capacity(groups.len());
3474                        for (key_vals, row_indices) in &groups {
3475                            let mut row = key_vals.clone();
3476                            for (ai, agg) in aggregates.iter().enumerate() {
3477                                let col_idx = agg_field_indices[ai];
3478                                let val = compute_group_aggregate(
3479                                    agg.function,
3480                                    &rows,
3481                                    row_indices,
3482                                    col_idx,
3483                                );
3484                                row.push(val);
3485                            }
3486                            out_rows.push(row);
3487                        }
3488
3489                        // Apply HAVING filter.
3490                        if let Some(having_expr) = having {
3491                            out_rows.retain(|row| eval_predicate(having_expr, row, &out_columns));
3492                        }
3493
3494                        Ok(QueryResult::Rows {
3495                            columns: out_columns,
3496                            rows: out_rows,
3497                        })
3498                    }
3499                    _ => Err("group by requires row input".into()),
3500                }
3501            }
3502
3503            PlanNode::CreateTable { name, fields } => {
3504                let columns: Vec<ColumnDef> = fields
3505                    .iter()
3506                    .enumerate()
3507                    .map(|(i, (fname, tname, req))| ColumnDef {
3508                        name: fname.clone(),
3509                        type_id: type_name_to_id(tname),
3510                        required: *req,
3511                        position: i as u16,
3512                    })
3513                    .collect();
3514                let schema = Schema {
3515                    table_name: name.clone(),
3516                    columns,
3517                };
3518                self.catalog
3519                    .create_table(schema)
3520                    .map_err(|e| e.to_string())?;
3521                Ok(QueryResult::Created(name.clone()))
3522            }
3523
3524            PlanNode::AlterTable { table, action } => match action {
3525                AlterAction::AddColumn {
3526                    name,
3527                    type_name,
3528                    required,
3529                } => {
3530                    let position = self
3531                        .catalog
3532                        .schema(table)
3533                        .ok_or_else(|| format!("table '{table}' not found"))?
3534                        .columns
3535                        .len() as u16;
3536                    let col = ColumnDef {
3537                        name: name.clone(),
3538                        type_id: type_name_to_id(type_name),
3539                        required: *required,
3540                        position,
3541                    };
3542                    self.catalog
3543                        .alter_table_add_column(table, col)
3544                        .map_err(|e| e.to_string())?;
3545                    Ok(QueryResult::Executed {
3546                        message: format!("column '{name}' added to '{table}'"),
3547                    })
3548                }
3549                AlterAction::DropColumn { name } => {
3550                    self.catalog
3551                        .alter_table_drop_column(table, name)
3552                        .map_err(|e| e.to_string())?;
3553                    Ok(QueryResult::Executed {
3554                        message: format!("column '{name}' dropped from '{table}'"),
3555                    })
3556                }
3557                AlterAction::AddIndex { column } => {
3558                    self.catalog
3559                        .create_index(table, column)
3560                        .map_err(|e| e.to_string())?;
3561                    Ok(QueryResult::Executed {
3562                        message: format!("index on '{table}.{column}' created"),
3563                    })
3564                }
3565            },
3566
3567            PlanNode::DropTable { name } => {
3568                self.catalog.drop_table(name).map_err(|e| e.to_string())?;
3569                Ok(QueryResult::Executed {
3570                    message: format!("table '{name}' dropped"),
3571                })
3572            }
3573
3574            PlanNode::CreateView { name, query_text } => {
3575                self.create_view(name, query_text)?;
3576                Ok(QueryResult::Executed {
3577                    message: format!("materialized view '{name}' created"),
3578                })
3579            }
3580
3581            PlanNode::RefreshView { name } => {
3582                self.refresh_view(name)?;
3583                Ok(QueryResult::Executed {
3584                    message: format!("materialized view '{name}' refreshed"),
3585                })
3586            }
3587
3588            PlanNode::DropView { name } => {
3589                self.drop_view(name)?;
3590                Ok(QueryResult::Executed {
3591                    message: format!("materialized view '{name}' dropped"),
3592                })
3593            }
3594
3595            PlanNode::Window { input, windows } => {
3596                let result = self.execute_plan(input)?;
3597                execute_window(result, windows)
3598            }
3599
3600            PlanNode::Union { left, right, all } => {
3601                let left_result = self.execute_plan(left)?;
3602                let right_result = self.execute_plan(right)?;
3603                let (left_cols, left_rows) = match left_result {
3604                    QueryResult::Rows { columns, rows } => (columns, rows),
3605                    _ => return Err("UNION requires query results on left side".into()),
3606                };
3607                let (_, right_rows) = match right_result {
3608                    QueryResult::Rows { columns, rows } => (columns, rows),
3609                    _ => return Err("UNION requires query results on right side".into()),
3610                };
3611                let mut combined = left_rows;
3612                if *all {
3613                    // UNION ALL — just concatenate.
3614                    combined.extend(right_rows);
3615                } else {
3616                    // UNION — deduplicate using the same HashSet approach
3617                    // as DISTINCT. Value already implements Hash + Eq.
3618                    let mut seen = std::collections::HashSet::new();
3619                    for row in &combined {
3620                        seen.insert(row.clone());
3621                    }
3622                    for row in right_rows {
3623                        if seen.insert(row.clone()) {
3624                            combined.push(row);
3625                        }
3626                    }
3627                }
3628                Ok(QueryResult::Rows {
3629                    columns: left_cols,
3630                    rows: combined,
3631                })
3632            }
3633
3634            PlanNode::Explain { input } => {
3635                let text = format_plan_tree(input, 0);
3636                Ok(QueryResult::Rows {
3637                    columns: vec!["plan".to_string()],
3638                    rows: text
3639                        .lines()
3640                        .map(|line| vec![Value::Str(line.to_string())])
3641                        .collect(),
3642                })
3643            }
3644
3645            PlanNode::IndexScan { table, column, key } => {
3646                let key_value = literal_to_value(key)?;
3647                let tbl = self
3648                    .catalog
3649                    .get_table(table)
3650                    .ok_or_else(|| format!("table '{table}' not found"))?;
3651                let columns: Vec<String> =
3652                    tbl.schema.columns.iter().map(|c| c.name.clone()).collect();
3653
3654                // Fast path: the table has a B-tree on this column. A single
3655                // point lookup returns 0 or 1 rows — this is the whole reason
3656                // the planner bothers emitting IndexScan.
3657                //
3658                // Mission D7: use `lookup_int` on int-keyed indexes to skip
3659                // the Value enum dispatch in the inner binary search. The
3660                // generic `tbl.index_lookup` helper can't do this without
3661                // lying about the key type, so we inline the index+heap
3662                // touch here.
3663                if let Some(btree) = tbl.index(column) {
3664                    let hit = match &key_value {
3665                        Value::Int(k) => btree.lookup_int(*k),
3666                        other => btree.lookup(other),
3667                    };
3668                    let rows = match hit {
3669                        Some(rid) => match tbl.heap.get(rid) {
3670                            Some(data) => vec![decode_row(&tbl.schema, &data)],
3671                            None => Vec::new(),
3672                        },
3673                        None => Vec::new(),
3674                    };
3675                    return Ok(QueryResult::Rows { columns, rows });
3676                }
3677
3678                // Fallback: no index on this column. The planner emits IndexScan
3679                // eagerly (it has no visibility into which columns are indexed
3680                // at plan time), so here we must behave like SeqScan+Filter on
3681                // `.col = literal`: return *all* matching rows, not just the
3682                // first one. A non-indexed column isn't necessarily unique.
3683                // We compile the eq predicate once and stream without any
3684                // per-row decode for non-matching rows.
3685                let schema = &tbl.schema;
3686                let fast = FastLayout::new(schema);
3687                let synth_pred = Expr::BinaryOp(
3688                    Box::new(Expr::Field(column.clone())),
3689                    BinOp::Eq,
3690                    Box::new(key.clone()),
3691                );
3692                if let Some(compiled) = compile_predicate(&synth_pred, &columns, &fast, schema) {
3693                    // Mission F: skip the first 4 Vec doublings.
3694                    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(64);
3695                    self.catalog
3696                        .for_each_row_raw(table, |_rid, data| {
3697                            if compiled(data) {
3698                                rows.push(decode_row(schema, data));
3699                            }
3700                        })
3701                        .map_err(|e| e.to_string())?;
3702                    return Ok(QueryResult::Rows { columns, rows });
3703                }
3704
3705                // Last resort: slow eq-check on materialised rows.
3706                let col_idx = schema
3707                    .column_index(column)
3708                    .ok_or_else(|| format!("column '{column}' not found"))?;
3709                let rows: Vec<Vec<Value>> = tbl
3710                    .scan()
3711                    .filter_map(|(_, row)| {
3712                        if row[col_idx] == key_value {
3713                            Some(row)
3714                        } else {
3715                            None
3716                        }
3717                    })
3718                    .collect();
3719                Ok(QueryResult::Rows { columns, rows })
3720            }
3721
3722            PlanNode::RangeScan {
3723                table,
3724                column,
3725                start,
3726                end,
3727            } => {
3728                let tbl = self
3729                    .catalog
3730                    .get_table(table)
3731                    .ok_or_else(|| format!("table '{table}' not found"))?;
3732                let columns: Vec<String> =
3733                    tbl.schema.columns.iter().map(|c| c.name.clone()).collect();
3734                let schema = &tbl.schema;
3735
3736                let start_val = match start {
3737                    Some((expr, _)) => Some(literal_to_value(expr)?),
3738                    None => None,
3739                };
3740                let end_val = match end {
3741                    Some((expr, _)) => Some(literal_to_value(expr)?),
3742                    None => None,
3743                };
3744                let start_inclusive = start.as_ref().map(|(_, inc)| *inc).unwrap_or(true);
3745                let end_inclusive = end.as_ref().map(|(_, inc)| *inc).unwrap_or(true);
3746
3747                if let Some(btree) = tbl.index(column) {
3748                    let hits: Vec<(Value, RowId)> = match (&start_val, &end_val) {
3749                        (Some(s), Some(e)) => btree.range(s, e).collect(),
3750                        (Some(s), None) => btree.range_from(s),
3751                        (None, Some(e)) => btree.range_to(e),
3752                        (None, None) => {
3753                            let rows: Vec<Vec<Value>> = tbl.scan().map(|(_, row)| row).collect();
3754                            return Ok(QueryResult::Rows { columns, rows });
3755                        }
3756                    };
3757                    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(hits.len());
3758                    for (key, rid) in hits {
3759                        if !start_inclusive {
3760                            if let Some(ref s) = start_val {
3761                                if &key == s {
3762                                    continue;
3763                                }
3764                            }
3765                        }
3766                        if !end_inclusive {
3767                            if let Some(ref e) = end_val {
3768                                if &key == e {
3769                                    continue;
3770                                }
3771                            }
3772                        }
3773                        if let Some(data) = tbl.heap.get(rid) {
3774                            rows.push(decode_row(schema, &data));
3775                        }
3776                    }
3777                    return Ok(QueryResult::Rows { columns, rows });
3778                }
3779
3780                // Fallback: no index — synthesize range predicate and scan.
3781                let fast = FastLayout::new(schema);
3782                let synth = synthesize_range_predicate(column, start, end);
3783                if let Some(compiled) = compile_predicate(&synth, &columns, &fast, schema) {
3784                    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(64);
3785                    self.catalog
3786                        .for_each_row_raw(table, |_rid, data| {
3787                            if compiled(data) {
3788                                rows.push(decode_row(schema, data));
3789                            }
3790                        })
3791                        .map_err(|e| e.to_string())?;
3792                    return Ok(QueryResult::Rows { columns, rows });
3793                }
3794
3795                let col_idx = schema
3796                    .column_index(column)
3797                    .ok_or_else(|| format!("column '{column}' not found"))?;
3798                let rows: Vec<Vec<Value>> = tbl
3799                    .scan()
3800                    .filter(|(_, row)| {
3801                        range_matches(
3802                            &row[col_idx],
3803                            &start_val,
3804                            start_inclusive,
3805                            &end_val,
3806                            end_inclusive,
3807                        )
3808                    })
3809                    .map(|(_, row)| row)
3810                    .collect();
3811                Ok(QueryResult::Rows { columns, rows })
3812            }
3813        }
3814    }
3815
3816    // ─── Materialized view operations ──────────────────────────────────────
3817
3818    /// Create a materialized view: execute the source query, store results
3819    /// in a new backing table, and register the view.
3820    fn create_view(&mut self, name: &str, query_text: &str) -> Result<(), String> {
3821        if self.view_registry.is_view(name) {
3822            return Err(format!("materialized view '{name}' already exists"));
3823        }
3824        // Execute the source query to get the result set.
3825        let result = self.execute_powql(query_text)?;
3826        let (columns, rows) = match result {
3827            QueryResult::Rows { columns, rows } => (columns, rows),
3828            _ => return Err("view source query must be a SELECT".into()),
3829        };
3830        // Derive a schema for the backing table from the query result columns.
3831        let schema = self.derive_view_schema(name, &columns, &rows);
3832        // Create the backing table and insert the result rows.
3833        self.catalog
3834            .create_table(schema)
3835            .map_err(|e| e.to_string())?;
3836        for row in &rows {
3837            self.catalog.insert(name, row).map_err(|e| e.to_string())?;
3838        }
3839        // Determine which base tables this view depends on by parsing the query.
3840        let depends_on = self.extract_view_deps(query_text);
3841        self.view_registry
3842            .register(ViewDef {
3843                name: name.to_string(),
3844                query: query_text.to_string(),
3845                depends_on,
3846                dirty: false,
3847            })
3848            .map_err(|e| e.to_string())?;
3849        Ok(())
3850    }
3851
3852    /// Refresh a materialized view: re-execute its source query and replace
3853    /// the backing table's contents.
3854    fn refresh_view(&mut self, name: &str) -> Result<(), String> {
3855        let def = self
3856            .view_registry
3857            .get(name)
3858            .ok_or_else(|| format!("materialized view '{name}' not found"))?;
3859        let query_text = def.query.clone();
3860        // Execute the source query.
3861        let result = self.execute_powql(&query_text)?;
3862        let (_columns, rows) = match result {
3863            QueryResult::Rows { columns, rows } => (columns, rows),
3864            _ => return Err("view source query must be a SELECT".into()),
3865        };
3866        // Clear old data and insert fresh results. Mission B2: logged
3867        // variant — view refreshes are a mutation and crash recovery
3868        // must see them.
3869        self.catalog
3870            .scan_delete_matching_logged(name, |_| true)
3871            .map_err(|e| e.to_string())?;
3872        for row in &rows {
3873            self.catalog.insert(name, row).map_err(|e| e.to_string())?;
3874        }
3875        self.view_registry.mark_clean(name);
3876        Ok(())
3877    }
3878
3879    /// Drop a materialized view: remove the backing table and unregister.
3880    fn drop_view(&mut self, name: &str) -> Result<(), String> {
3881        if !self.view_registry.is_view(name) {
3882            return Err(format!("materialized view '{name}' not found"));
3883        }
3884        self.view_registry
3885            .unregister(name)
3886            .map_err(|e| e.to_string())?;
3887        self.catalog.drop_table(name).map_err(|e| e.to_string())?;
3888        Ok(())
3889    }
3890
3891    /// Derive a storage `Schema` for a view's backing table from query
3892    /// result column names and the first row's types.
3893    fn derive_view_schema(&self, name: &str, columns: &[String], rows: &[Vec<Value>]) -> Schema {
3894        use powdb_storage::types::{ColumnDef, TypeId};
3895        let cols: Vec<ColumnDef> = columns
3896            .iter()
3897            .enumerate()
3898            .map(|(i, col_name)| {
3899                let type_id = rows
3900                    .first()
3901                    .and_then(|row| row.get(i))
3902                    .map(|v| v.type_id())
3903                    .unwrap_or(TypeId::Str);
3904                ColumnDef {
3905                    name: col_name.clone(),
3906                    type_id,
3907                    required: false,
3908                    position: i as u16,
3909                }
3910            })
3911            .collect();
3912        Schema {
3913            table_name: name.to_string(),
3914            columns: cols,
3915        }
3916    }
3917
3918    /// Extract base table dependencies from a view's source query by
3919    /// parsing it and collecting the source table name.
3920    fn extract_view_deps(&self, query_text: &str) -> Vec<String> {
3921        use crate::parser::parse;
3922        match parse(query_text) {
3923            Ok(Statement::Query(q)) => {
3924                let mut deps = vec![q.source.clone()];
3925                for j in &q.joins {
3926                    deps.push(j.source.clone());
3927                }
3928                deps
3929            }
3930            _ => Vec::new(),
3931        }
3932    }
3933
3934    // ─── Specialized fast paths ─────────────────────────────────────────────
3935    //
3936    // These methods are helpers for the `execute_plan` match arms above.
3937    // Each returns `Ok(Some(result))` when the fast path fires, `Ok(None)`
3938    // when the shape isn't supported (caller falls back to generic code).
3939
3940    /// Aggregate sum/avg/min/max over a single fixed-size i64 column, with
3941    /// an optional compiled filter predicate. Walks raw row bytes — zero
3942    /// per-row allocation. Uses i128 accumulator for sum/avg overflow safety.
3943    fn agg_single_col_fast(
3944        &self,
3945        table: &str,
3946        col: &str,
3947        function: AggFunc,
3948        predicate: Option<&Expr>,
3949    ) -> Result<Option<QueryResult>, String> {
3950        let schema = self
3951            .catalog
3952            .schema(table)
3953            .ok_or_else(|| format!("table '{table}' not found"))?
3954            .clone();
3955        let columns: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
3956        let col_idx = match schema.column_index(col) {
3957            Some(i) => i,
3958            None => return Ok(None),
3959        };
3960        // Only fast-path fixed-size numeric columns (Int/Float) for
3961        // sum/avg/min/max/count. Mission D10: Float parity — prior version
3962        // bailed on Float columns, forcing them through the generic row-
3963        // decoding path that allocated a Vec<Value> per row and dispatched
3964        // on Value::cmp for every compare. f64 decode is structurally the
3965        // same as i64 (load 8 bytes, cast), so the fast path handles both.
3966        let col_type = schema.columns[col_idx].type_id;
3967        if col_type != TypeId::Int && col_type != TypeId::Float {
3968            return Ok(None);
3969        }
3970
3971        let fast = FastLayout::new(&schema);
3972        // Mission C Phase 20b: inline the numeric-column reader instead of
3973        // building a `Box<dyn Fn>`. Eliminates 100K vtable dispatches per
3974        // 100K-row agg scan — every reader call folds directly into the
3975        // hot loop below.
3976        let byte_offset = match fast.fixed_offsets[col_idx] {
3977            Some(o) => o,
3978            None => return Ok(None),
3979        };
3980        let bitmap_byte = col_idx / 8;
3981        let bitmap_bit = (col_idx % 8) as u32;
3982        let data_offset = 2 + fast.bitmap_size + byte_offset;
3983
3984        // Optional compiled filter.
3985        let compiled_pred: Option<CompiledPredicate> = match predicate {
3986            Some(pred) => match compile_predicate(pred, &columns, &fast, &schema) {
3987                Some(c) => Some(c),
3988                None => return Ok(None), // let generic path handle it
3989            },
3990            None => None,
3991        };
3992
3993        // Mission C Phase 20b: specialize the inner loop per aggregate
3994        // function. The previous version ran a `match function { ... }`
3995        // *inside* the closure, which kept LLVM from producing optimal
3996        // scalar code for each variant (agg_max regressed ~23% vs the
3997        // baseline Box<dyn Fn> version even though per-row vtable cost
3998        // should have been strictly lower). Pushing the match out of the
3999        // hot loop lets each specialized body fold cleanly into
4000        // `for_each_row_raw` and removes a captured `AggFunc` + match
4001        // dispatch per row.
4002        //
4003        // Mission D10: same specialisation applies to the Float branch.
4004        // For Min/Max we use `f64::total_cmp` so the result matches
4005        // `Value::Ord` — this is the same ordering ORDER BY and the
4006        // top-N sort fast path use, keeping semantics consistent across
4007        // read paths (NaN compares as greatest, -0.0 < +0.0 for
4008        // deterministic tie-breaking).
4009        //
4010        // Mission D11 Phase 1: each inner loop now splits on presence of
4011        // a predicate (`if let Some(pred) = &compiled_pred`) so the hot
4012        // body never re-tests `Option` per row, and reads column bytes
4013        // via `read_i64_unchecked` / `read_f64_unchecked` helpers that
4014        // drop two bounds checks per row (null bitmap byte + value
4015        // slice). Safety is carried by the `FastLayout` invariant that
4016        // `data_offset + 8 <= row_len` for any fixed-size column; see
4017        // the helper doc comments. Hot loops are macro-generated so the
4018        // with-pred / no-pred split can't drift between variants.
4019        let result = match col_type {
4020            TypeId::Int => match function {
4021                AggFunc::Sum | AggFunc::Avg => {
4022                    let mut sum_i128: i128 = 0;
4023                    let mut count: i64 = 0;
4024                    agg_int_loop!(
4025                        self,
4026                        table,
4027                        compiled_pred,
4028                        bitmap_byte,
4029                        bitmap_bit,
4030                        data_offset,
4031                        |v: i64| {
4032                            count += 1;
4033                            sum_i128 += v as i128;
4034                        }
4035                    );
4036                    if matches!(function, AggFunc::Sum) {
4037                        let clamped = sum_i128.clamp(i64::MIN as i128, i64::MAX as i128) as i64;
4038                        QueryResult::Scalar(Value::Int(clamped))
4039                    } else if count == 0 {
4040                        QueryResult::Scalar(Value::Empty)
4041                    } else {
4042                        let avg = (sum_i128 as f64) / (count as f64);
4043                        QueryResult::Scalar(Value::Float(avg))
4044                    }
4045                }
4046                AggFunc::Min => {
4047                    let mut min_v: Option<i64> = None;
4048                    agg_int_loop!(
4049                        self,
4050                        table,
4051                        compiled_pred,
4052                        bitmap_byte,
4053                        bitmap_bit,
4054                        data_offset,
4055                        |v: i64| {
4056                            min_v = Some(match min_v {
4057                                Some(m) => m.min(v),
4058                                None => v,
4059                            });
4060                        }
4061                    );
4062                    QueryResult::Scalar(min_v.map(Value::Int).unwrap_or(Value::Empty))
4063                }
4064                AggFunc::Max => {
4065                    let mut max_v: Option<i64> = None;
4066                    agg_int_loop!(
4067                        self,
4068                        table,
4069                        compiled_pred,
4070                        bitmap_byte,
4071                        bitmap_bit,
4072                        data_offset,
4073                        |v: i64| {
4074                            max_v = Some(match max_v {
4075                                Some(m) => m.max(v),
4076                                None => v,
4077                            });
4078                        }
4079                    );
4080                    QueryResult::Scalar(max_v.map(Value::Int).unwrap_or(Value::Empty))
4081                }
4082                AggFunc::Count => {
4083                    let mut count: i64 = 0;
4084                    agg_int_loop!(
4085                        self,
4086                        table,
4087                        compiled_pred,
4088                        bitmap_byte,
4089                        bitmap_bit,
4090                        data_offset,
4091                        |_v: i64| {
4092                            count += 1;
4093                        }
4094                    );
4095                    QueryResult::Scalar(Value::Int(count))
4096                }
4097                AggFunc::CountDistinct => {
4098                    let mut seen = rustc_hash::FxHashSet::default();
4099                    agg_int_loop!(
4100                        self,
4101                        table,
4102                        compiled_pred,
4103                        bitmap_byte,
4104                        bitmap_bit,
4105                        data_offset,
4106                        |v: i64| {
4107                            seen.insert(v);
4108                        }
4109                    );
4110                    QueryResult::Scalar(Value::Int(seen.len() as i64))
4111                }
4112            },
4113            TypeId::Float => match function {
4114                AggFunc::Sum => {
4115                    // Use a single f64 accumulator. Naive summation is
4116                    // sufficient for MVP parity; if precision becomes an
4117                    // issue on long scans we can upgrade to Kahan–Neumaier
4118                    // compensated sum (~2x scalar cost, zero error growth).
4119                    let mut sum: f64 = 0.0;
4120                    agg_float_loop!(
4121                        self,
4122                        table,
4123                        compiled_pred,
4124                        bitmap_byte,
4125                        bitmap_bit,
4126                        data_offset,
4127                        |v: f64| {
4128                            sum += v;
4129                        }
4130                    );
4131                    QueryResult::Scalar(Value::Float(sum))
4132                }
4133                AggFunc::Avg => {
4134                    let mut sum: f64 = 0.0;
4135                    let mut count: i64 = 0;
4136                    agg_float_loop!(
4137                        self,
4138                        table,
4139                        compiled_pred,
4140                        bitmap_byte,
4141                        bitmap_bit,
4142                        data_offset,
4143                        |v: f64| {
4144                            sum += v;
4145                            count += 1;
4146                        }
4147                    );
4148                    if count == 0 {
4149                        QueryResult::Scalar(Value::Empty)
4150                    } else {
4151                        QueryResult::Scalar(Value::Float(sum / count as f64))
4152                    }
4153                }
4154                AggFunc::Min => {
4155                    // `total_cmp` for deterministic NaN handling (matches
4156                    // Value::Ord). NaN compares greatest, so Min will
4157                    // correctly ignore it in favour of any finite value.
4158                    let mut min_v: Option<f64> = None;
4159                    agg_float_loop!(
4160                        self,
4161                        table,
4162                        compiled_pred,
4163                        bitmap_byte,
4164                        bitmap_bit,
4165                        data_offset,
4166                        |v: f64| {
4167                            min_v = Some(match min_v {
4168                                Some(m) => {
4169                                    if v.total_cmp(&m).is_lt() {
4170                                        v
4171                                    } else {
4172                                        m
4173                                    }
4174                                }
4175                                None => v,
4176                            });
4177                        }
4178                    );
4179                    QueryResult::Scalar(min_v.map(Value::Float).unwrap_or(Value::Empty))
4180                }
4181                AggFunc::Max => {
4182                    let mut max_v: Option<f64> = None;
4183                    agg_float_loop!(
4184                        self,
4185                        table,
4186                        compiled_pred,
4187                        bitmap_byte,
4188                        bitmap_bit,
4189                        data_offset,
4190                        |v: f64| {
4191                            max_v = Some(match max_v {
4192                                Some(m) => {
4193                                    if v.total_cmp(&m).is_gt() {
4194                                        v
4195                                    } else {
4196                                        m
4197                                    }
4198                                }
4199                                None => v,
4200                            });
4201                        }
4202                    );
4203                    QueryResult::Scalar(max_v.map(Value::Float).unwrap_or(Value::Empty))
4204                }
4205                AggFunc::Count => {
4206                    let mut count: i64 = 0;
4207                    agg_float_loop!(
4208                        self,
4209                        table,
4210                        compiled_pred,
4211                        bitmap_byte,
4212                        bitmap_bit,
4213                        data_offset,
4214                        |_v: f64| {
4215                            count += 1;
4216                        }
4217                    );
4218                    QueryResult::Scalar(Value::Int(count))
4219                }
4220                AggFunc::CountDistinct => {
4221                    // Hash on `f64::to_bits` — matches `Value::Hash`, so
4222                    // distinct NaN bit patterns count as distinct and
4223                    // -0.0/+0.0 count as distinct. Consistent with how
4224                    // Float values are hashed in every other DISTINCT /
4225                    // GROUP BY path.
4226                    let mut seen = rustc_hash::FxHashSet::default();
4227                    agg_float_loop!(
4228                        self,
4229                        table,
4230                        compiled_pred,
4231                        bitmap_byte,
4232                        bitmap_bit,
4233                        data_offset,
4234                        |v: f64| {
4235                            seen.insert(v.to_bits());
4236                        }
4237                    );
4238                    QueryResult::Scalar(Value::Int(seen.len() as i64))
4239                }
4240            },
4241            _ => unreachable!("type guard above restricts to Int/Float"),
4242        };
4243        Ok(Some(result))
4244    }
4245
4246    /// `Project(Limit(Filter(SeqScan)))` and `Project(Limit(SeqScan))`.
4247    /// Streams rows, decodes only projected columns, stops at the limit.
4248    fn project_filter_limit_fast(
4249        &self,
4250        table: &str,
4251        fields: &[ProjectField],
4252        limit: usize,
4253        predicate: Option<&Expr>,
4254    ) -> Result<Option<QueryResult>, String> {
4255        let schema = self
4256            .catalog
4257            .schema(table)
4258            .ok_or_else(|| format!("table '{table}' not found"))?
4259            .clone();
4260        let all_columns: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
4261
4262        // Each projection field must be a simple `.field` reference for this
4263        // fast path. Aliased or computed fields fall through.
4264        let mut proj_indices: Vec<usize> = Vec::with_capacity(fields.len());
4265        let mut proj_columns: Vec<String> = Vec::with_capacity(fields.len());
4266        for f in fields {
4267            let name = match &f.expr {
4268                Expr::Field(n) => n.clone(),
4269                _ => return Ok(None),
4270            };
4271            let idx = match all_columns.iter().position(|c| c == &name) {
4272                Some(i) => i,
4273                None => return Ok(None),
4274            };
4275            proj_indices.push(idx);
4276            proj_columns.push(f.alias.clone().unwrap_or(name));
4277        }
4278
4279        let fast = FastLayout::new(&schema);
4280        let row_layout = RowLayout::new(&schema);
4281
4282        let compiled_pred: Option<CompiledPredicate> = match predicate {
4283            Some(pred) => match compile_predicate(pred, &all_columns, &fast, &schema) {
4284                Some(c) => Some(c),
4285                None => return Ok(None),
4286            },
4287            None => None,
4288        };
4289
4290        let mut out: Vec<Vec<Value>> = Vec::with_capacity(limit.min(1024));
4291        // Mission D2: use try_for_each_row_raw to actually stop iterating
4292        // once the limit is reached. The previous `done` flag only short-
4293        // circuited the closure body, so a `limit 100` over 100K rows still
4294        // walked all 100K slots — burning ~30x SQLite on scan_filter_project_top100.
4295        self.catalog
4296            .try_for_each_row_raw(table, |_rid, data| {
4297                use std::ops::ControlFlow;
4298                if let Some(ref pred) = compiled_pred {
4299                    if !pred(data) {
4300                        return ControlFlow::Continue(());
4301                    }
4302                }
4303                let row: Vec<Value> = proj_indices
4304                    .iter()
4305                    .map(|&ci| decode_column(&schema, &row_layout, data, ci))
4306                    .collect();
4307                out.push(row);
4308                if out.len() >= limit {
4309                    ControlFlow::Break(())
4310                } else {
4311                    ControlFlow::Continue(())
4312                }
4313            })
4314            .map_err(|e| e.to_string())?;
4315
4316        Ok(Some(QueryResult::Rows {
4317            columns: proj_columns,
4318            rows: out,
4319        }))
4320    }
4321
4322    /// `Project(Limit(Sort(Filter(SeqScan))))` and `Project(Limit(Sort(SeqScan)))`.
4323    /// Bounded top-N heap over the sort key. Only the sort key needs to be
4324    /// read per row; projected columns are decoded only for the final
4325    /// winning rows when the heap drains.
4326    fn project_filter_sort_limit_fast(
4327        &self,
4328        table: &str,
4329        fields: &[ProjectField],
4330        sort_field: &str,
4331        descending: bool,
4332        limit: usize,
4333        predicate: Option<&Expr>,
4334    ) -> Result<Option<QueryResult>, String> {
4335        if limit == 0 {
4336            // Degenerate case — empty result. Let the generic path handle it
4337            // for proper column naming.
4338            return Ok(None);
4339        }
4340        let schema = self
4341            .catalog
4342            .schema(table)
4343            .ok_or_else(|| format!("table '{table}' not found"))?
4344            .clone();
4345        let all_columns: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
4346
4347        // Sort key must be a fixed-size numeric column (Int or Float).
4348        // Mission D10: extended from Int-only. Float sort keys use a
4349        // sortable-u64 transform (see `f64_to_sortable_u64`) so the heap
4350        // path stays keyed on `u64` and the whole branch shape is
4351        // identical to the Int case — no new heap types, no `total_cmp`
4352        // closures in the hot loop.
4353        let sort_idx = match schema.column_index(sort_field) {
4354            Some(i) => i,
4355            None => return Ok(None),
4356        };
4357        let sort_col_type = schema.columns[sort_idx].type_id;
4358        if sort_col_type != TypeId::Int && sort_col_type != TypeId::Float {
4359            return Ok(None);
4360        }
4361
4362        // Each projection field must be a simple `.field`.
4363        let mut proj_indices: Vec<usize> = Vec::with_capacity(fields.len());
4364        let mut proj_columns: Vec<String> = Vec::with_capacity(fields.len());
4365        for f in fields {
4366            let name = match &f.expr {
4367                Expr::Field(n) => n.clone(),
4368                _ => return Ok(None),
4369            };
4370            let idx = match all_columns.iter().position(|c| c == &name) {
4371                Some(i) => i,
4372                None => return Ok(None),
4373            };
4374            proj_indices.push(idx);
4375            proj_columns.push(f.alias.clone().unwrap_or(name));
4376        }
4377
4378        let fast = FastLayout::new(&schema);
4379        let row_layout = RowLayout::new(&schema);
4380        // Mission C Phase 20b: inline numeric-column reader (no Box<dyn Fn>).
4381        let sort_byte_offset = match fast.fixed_offsets[sort_idx] {
4382            Some(o) => o,
4383            None => return Ok(None),
4384        };
4385        let sort_bitmap_byte = sort_idx / 8;
4386        let sort_bitmap_bit = (sort_idx % 8) as u32;
4387        let sort_data_offset = 2 + fast.bitmap_size + sort_byte_offset;
4388
4389        let compiled_pred: Option<CompiledPredicate> = match predicate {
4390            Some(pred) => match compile_predicate(pred, &all_columns, &fast, &schema) {
4391                Some(c) => Some(c),
4392                None => return Ok(None),
4393            },
4394            None => None,
4395        };
4396
4397        // Bounded top-N heap. For `order .x desc limit N`, we want the N
4398        // largest values — use a min-heap so the smallest is at the top and
4399        // can be popped when a better candidate arrives. For ascending, use
4400        // a max-heap. We tie-break with a monotonic `seq` counter so the
4401        // result is deterministic and stable.
4402        //
4403        // To keep this simple we maintain two typed heaps and pick by
4404        // direction.
4405        let drained: Vec<Vec<u8>> = match sort_col_type {
4406            TypeId::Int => {
4407                let mut seq: u64 = 0;
4408                let mut heap_desc: BinaryHeap<Reverse<(i64, u64, Vec<u8>)>> =
4409                    BinaryHeap::with_capacity(limit);
4410                let mut heap_asc: BinaryHeap<(i64, u64, Vec<u8>)> =
4411                    BinaryHeap::with_capacity(limit);
4412
4413                self.catalog
4414                    .for_each_row_raw(table, |_rid, data| {
4415                        if let Some(ref pred) = compiled_pred {
4416                            if !pred(data) {
4417                                return;
4418                            }
4419                        }
4420                        // Inlined int-column reader: null check + i64 decode.
4421                        let is_null = (data[2 + sort_bitmap_byte] >> sort_bitmap_bit) & 1 == 1;
4422                        if is_null {
4423                            return;
4424                        }
4425                        let key = i64::from_le_bytes(
4426                            data[sort_data_offset..sort_data_offset + 8]
4427                                .try_into()
4428                                .unwrap(),
4429                        );
4430                        let id = seq;
4431                        seq += 1;
4432
4433                        if descending {
4434                            if heap_desc.len() < limit {
4435                                heap_desc.push(Reverse((key, id, data.to_vec())));
4436                            } else if let Some(Reverse((top_key, _, _))) = heap_desc.peek() {
4437                                if key > *top_key {
4438                                    heap_desc.pop();
4439                                    heap_desc.push(Reverse((key, id, data.to_vec())));
4440                                }
4441                            }
4442                        } else if heap_asc.len() < limit {
4443                            heap_asc.push((key, id, data.to_vec()));
4444                        } else if let Some((top_key, _, _)) = heap_asc.peek() {
4445                            if key < *top_key {
4446                                heap_asc.pop();
4447                                heap_asc.push((key, id, data.to_vec()));
4448                            }
4449                        }
4450                    })
4451                    .map_err(|e| e.to_string())?;
4452
4453                let mut drained: Vec<(i64, u64, Vec<u8>)> = if descending {
4454                    heap_desc.into_iter().map(|Reverse(t)| t).collect()
4455                } else {
4456                    heap_asc.into_iter().collect()
4457                };
4458                if descending {
4459                    drained.sort_unstable_by(|a, b| b.0.cmp(&a.0).then(a.1.cmp(&b.1)));
4460                } else {
4461                    drained.sort_unstable_by(|a, b| a.0.cmp(&b.0).then(a.1.cmp(&b.1)));
4462                }
4463                drained.into_iter().map(|(_, _, d)| d).collect()
4464            }
4465            TypeId::Float => {
4466                // Novel angle: rather than introducing a `TotalF64` newtype
4467                // with `Ord via total_cmp`, transform the f64 bit pattern
4468                // into a sortable `u64` so `BinaryHeap<u64>` orders exactly
4469                // like `f64::total_cmp` would. Classic trick: flip the sign
4470                // bit on positives, flip all bits on negatives. Result:
4471                // - NaN (sign=0) stays greatest, matching total_cmp
4472                // - -0.0 sorts before +0.0, matching total_cmp
4473                // - Hot loop is branch-cheap (one compare + one xor)
4474                let mut seq: u64 = 0;
4475                let mut heap_desc: BinaryHeap<Reverse<(u64, u64, Vec<u8>)>> =
4476                    BinaryHeap::with_capacity(limit);
4477                let mut heap_asc: BinaryHeap<(u64, u64, Vec<u8>)> =
4478                    BinaryHeap::with_capacity(limit);
4479
4480                self.catalog
4481                    .for_each_row_raw(table, |_rid, data| {
4482                        if let Some(ref pred) = compiled_pred {
4483                            if !pred(data) {
4484                                return;
4485                            }
4486                        }
4487                        let is_null = (data[2 + sort_bitmap_byte] >> sort_bitmap_bit) & 1 == 1;
4488                        if is_null {
4489                            return;
4490                        }
4491                        let bits = u64::from_le_bytes(
4492                            data[sort_data_offset..sort_data_offset + 8]
4493                                .try_into()
4494                                .unwrap(),
4495                        );
4496                        let key = f64_bits_to_sortable_u64(bits);
4497                        let id = seq;
4498                        seq += 1;
4499
4500                        if descending {
4501                            if heap_desc.len() < limit {
4502                                heap_desc.push(Reverse((key, id, data.to_vec())));
4503                            } else if let Some(Reverse((top_key, _, _))) = heap_desc.peek() {
4504                                if key > *top_key {
4505                                    heap_desc.pop();
4506                                    heap_desc.push(Reverse((key, id, data.to_vec())));
4507                                }
4508                            }
4509                        } else if heap_asc.len() < limit {
4510                            heap_asc.push((key, id, data.to_vec()));
4511                        } else if let Some((top_key, _, _)) = heap_asc.peek() {
4512                            if key < *top_key {
4513                                heap_asc.pop();
4514                                heap_asc.push((key, id, data.to_vec()));
4515                            }
4516                        }
4517                    })
4518                    .map_err(|e| e.to_string())?;
4519
4520                let mut drained: Vec<(u64, u64, Vec<u8>)> = if descending {
4521                    heap_desc.into_iter().map(|Reverse(t)| t).collect()
4522                } else {
4523                    heap_asc.into_iter().collect()
4524                };
4525                if descending {
4526                    drained.sort_unstable_by(|a, b| b.0.cmp(&a.0).then(a.1.cmp(&b.1)));
4527                } else {
4528                    drained.sort_unstable_by(|a, b| a.0.cmp(&b.0).then(a.1.cmp(&b.1)));
4529                }
4530                drained.into_iter().map(|(_, _, d)| d).collect()
4531            }
4532            _ => unreachable!("type guard above restricts to Int/Float"),
4533        };
4534
4535        let rows: Vec<Vec<Value>> = drained
4536            .into_iter()
4537            .map(|data| {
4538                proj_indices
4539                    .iter()
4540                    .map(|&ci| decode_column(&schema, &row_layout, &data, ci))
4541                    .collect()
4542            })
4543            .collect();
4544
4545        Ok(Some(QueryResult::Rows {
4546            columns: proj_columns,
4547            rows,
4548        }))
4549    }
4550
4551    /// Gather the RowIds that a mutation should operate on, without
4552    /// materialising the full row set. Handles the shapes the planner emits
4553    /// for update/delete: SeqScan, IndexScan, and Filter(SeqScan). Other
4554    /// shapes fall back to `generic_rid_match`.
4555    ///
4556    /// Perf sprint: try to fuse the predicate evaluation and in-place
4557    /// byte-level mutation into a single heap walk. Returns `Some(result)`
4558    /// if the fused path fired, `None` to fall through to the generic
4559    /// two-pass code.
4560    ///
4561    /// Covers two shapes:
4562    /// 1. Fixed-width non-null literal assignments on non-indexed columns
4563    ///    → byte-patch every matched row in place (row length unchanged).
4564    /// 2. Single var-col literal assignment on a non-indexed column
4565    ///    → `patch_var_column_in_place` on every matched row (may shrink);
4566    ///    rows that can't be patched in place are collected for fallback.
4567    fn try_fused_scan_update(
4568        &mut self,
4569        table: &str,
4570        predicate: &Expr,
4571        resolved: &[(usize, Value)],
4572        changed_cols: &[usize],
4573    ) -> Option<Result<QueryResult, String>> {
4574        // Build compiled predicate. Requires a schema borrow that must be
4575        // dropped before we call scan_patch_matching_logged.
4576        let compiled = {
4577            let schema = self.catalog.schema(table)?;
4578            let columns: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
4579            let fast = FastLayout::new(schema);
4580            compile_predicate(predicate, &columns, &fast, schema)?
4581        };
4582
4583        // ── Path 1: fixed-width fast patch ──────────────────────────
4584        let fixed_patches: Option<Vec<FastPatch>> = {
4585            let tbl = self.catalog.get_table(table)?;
4586            let schema = &tbl.schema;
4587            let all_fixed_nonnull = resolved
4588                .iter()
4589                .all(|(idx, val)| is_fixed_size(schema.columns[*idx].type_id) && !val.is_empty());
4590            let no_indexed = !resolved.iter().any(|(idx, _)| tbl.has_indexed_col(*idx));
4591            if all_fixed_nonnull && no_indexed {
4592                let layout = RowLayout::new(schema);
4593                let bitmap_size = layout.bitmap_size();
4594                Some(
4595                    resolved
4596                        .iter()
4597                        .map(|(idx, val)| {
4598                            let fixed_off = layout
4599                                .fixed_offset(*idx)
4600                                .expect("is_fixed_size already checked");
4601                            let field_off = 2 + bitmap_size + fixed_off;
4602                            let bytes: FixedBytes = match val {
4603                                Value::Int(v) => FixedBytes::I64(v.to_le_bytes()),
4604                                Value::Float(v) => FixedBytes::F64(v.to_le_bytes()),
4605                                Value::Bool(v) => FixedBytes::Bool(if *v { 1 } else { 0 }),
4606                                Value::DateTime(v) => FixedBytes::I64(v.to_le_bytes()),
4607                                Value::Uuid(v) => FixedBytes::Uuid(*v),
4608                                _ => unreachable!("all_fixed_nonnull guard"),
4609                            };
4610                            FastPatch {
4611                                field_off,
4612                                bitmap_byte_off: 2 + idx / 8,
4613                                bit_mask: 1u8 << (idx % 8),
4614                                bytes,
4615                            }
4616                        })
4617                        .collect(),
4618                )
4619            } else {
4620                None
4621            }
4622        };
4623        if let Some(patches) = fixed_patches {
4624            let result = self
4625                .catalog
4626                .scan_patch_matching_logged(table, compiled, |row| {
4627                    for p in &patches {
4628                        row[p.bitmap_byte_off] &= !p.bit_mask;
4629                        let field_bytes = p.bytes.as_slice();
4630                        row[p.field_off..p.field_off + field_bytes.len()]
4631                            .copy_from_slice(field_bytes);
4632                    }
4633                    Some(row.len() as u16)
4634                })
4635                .map_err(|e| e.to_string());
4636            match result {
4637                Ok((count, _)) => {
4638                    self.view_registry.mark_dependents_dirty(table);
4639                    return Some(Ok(QueryResult::Modified(count)));
4640                }
4641                Err(e) => return Some(Err(e)),
4642            }
4643        }
4644
4645        // ── Path 2: single var-col shrink fast patch ────────────────
4646        let var_patch: Option<(usize, Option<Vec<u8>>)> = {
4647            let tbl = self.catalog.get_table(table)?;
4648            let schema = &tbl.schema;
4649            let is_single = resolved.len() == 1;
4650            let is_var = is_single && !is_fixed_size(schema.columns[resolved[0].0].type_id);
4651            let no_indexed = !resolved.iter().any(|(idx, _)| tbl.has_indexed_col(*idx));
4652            if is_single && is_var && no_indexed {
4653                let (idx, val) = &resolved[0];
4654                let bytes_opt = match val {
4655                    Value::Str(s) => Some(s.as_bytes().to_vec()),
4656                    Value::Bytes(b) => Some(b.clone()),
4657                    Value::Empty => None,
4658                    _ => return None, // type mismatch, fall through
4659                };
4660                Some((*idx, bytes_opt))
4661            } else {
4662                None
4663            }
4664        };
4665        if let Some((col_idx, ref new_bytes_opt)) = var_patch {
4666            // Build a fresh RowLayout before the mutable borrow.
4667            let layout = {
4668                let schema = self.catalog.schema(table)?;
4669                RowLayout::new(schema)
4670            };
4671            let new_bytes_ref: Option<&[u8]> = new_bytes_opt.as_deref();
4672            let result = self
4673                .catalog
4674                .scan_patch_matching_logged(table, compiled, |row| {
4675                    patch_var_column_in_place(row, &layout, col_idx, new_bytes_ref)
4676                })
4677                .map_err(|e| e.to_string());
4678            match result {
4679                Ok((mut count, fallback_rids)) => {
4680                    // Handle rows where in-place patch failed (new > old).
4681                    for rid in fallback_rids {
4682                        let mut row = match self.catalog.get(table, rid) {
4683                            Some(r) => r,
4684                            None => continue,
4685                        };
4686                        for (idx, val) in resolved.iter() {
4687                            row[*idx] = val.clone();
4688                        }
4689                        self.catalog
4690                            .update_hinted(table, rid, &row, Some(changed_cols))
4691                            .map_err(|e| e.to_string())
4692                            .ok();
4693                        count += 1;
4694                    }
4695                    self.view_registry.mark_dependents_dirty(table);
4696                    return Some(Ok(QueryResult::Modified(count)));
4697                }
4698                Err(e) => return Some(Err(e)),
4699            }
4700        }
4701
4702        None // no fused path applicable — fall through
4703    }
4704
4705    /// Mission C Phase 3: schema is looked up via `self.catalog.schema(table)`
4706    /// inside the branches that actually need it. Previously the caller had
4707    /// to clone the full Schema (6+ String allocs) before every mutation just
4708    /// so this function could borrow it — a cost the update/delete hot path
4709    /// did not need.
4710    fn collect_rids_for_mutation(
4711        &mut self,
4712        input: &PlanNode,
4713        table: &str,
4714    ) -> Result<Vec<RowId>, String> {
4715        match input {
4716            PlanNode::SeqScan { table: t } if t == table => {
4717                // "Update/delete everything" — rare but legal.
4718                let rids: Vec<RowId> = self
4719                    .catalog
4720                    .scan(table)
4721                    .map_err(|e| e.to_string())?
4722                    .map(|(rid, _)| rid)
4723                    .collect();
4724                Ok(rids)
4725            }
4726            PlanNode::IndexScan {
4727                table: t,
4728                column,
4729                key,
4730            } if t == table => {
4731                let key_value = literal_to_value(key)?;
4732
4733                // Indexed case: single lookup, 0 or 1 rows.
4734                // Mission D7: int-specialized fast path on int-keyed indexes
4735                // (primary keys, created_at, etc.) — the common case for
4736                // `update_by_pk` / `delete where id = ?`.
4737                //
4738                // Scope the `tbl` borrow so it's released before we fall
4739                // through to the scan-based paths below (which reborrow
4740                // `self.catalog`).
4741                {
4742                    let tbl = self
4743                        .catalog
4744                        .get_table(table)
4745                        .ok_or_else(|| format!("table '{table}' not found"))?;
4746                    if let Some(btree) = tbl.index(column) {
4747                        let hit = match &key_value {
4748                            Value::Int(k) => btree.lookup_int(*k),
4749                            other => btree.lookup(other),
4750                        };
4751                        return Ok(match hit {
4752                            Some(rid) => vec![rid],
4753                            None => Vec::new(),
4754                        });
4755                    }
4756                }
4757
4758                // No index: the planner folds `.col = literal` to IndexScan
4759                // regardless of whether the column is actually unique. When
4760                // there's no index we must behave like Filter(SeqScan) and
4761                // return *all* matching RIDs — not just the first one.
4762                let schema = self
4763                    .catalog
4764                    .schema(table)
4765                    .ok_or_else(|| format!("table '{table}' not found"))?;
4766                let columns: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
4767                let fast = FastLayout::new(schema);
4768                let synth = Expr::BinaryOp(
4769                    Box::new(Expr::Field(column.clone())),
4770                    BinOp::Eq,
4771                    Box::new(key.clone()),
4772                );
4773                if let Some(compiled) = compile_predicate(&synth, &columns, &fast, schema) {
4774                    // Mission F: skip the first 4 Vec doublings.
4775                    let mut rids: Vec<RowId> = Vec::with_capacity(64);
4776                    self.catalog
4777                        .for_each_row_raw(table, |rid, data| {
4778                            if compiled(data) {
4779                                rids.push(rid);
4780                            }
4781                        })
4782                        .map_err(|e| e.to_string())?;
4783                    return Ok(rids);
4784                }
4785
4786                // Fallback: decode each row, compare values.
4787                let col_idx = schema
4788                    .column_index(column)
4789                    .ok_or_else(|| format!("column '{column}' not found"))?;
4790                let rids: Vec<RowId> = self
4791                    .catalog
4792                    .scan(table)
4793                    .map_err(|e| e.to_string())?
4794                    .filter_map(|(rid, row)| {
4795                        if row[col_idx] == key_value {
4796                            Some(rid)
4797                        } else {
4798                            None
4799                        }
4800                    })
4801                    .collect();
4802                Ok(rids)
4803            }
4804            PlanNode::Filter {
4805                input: inner,
4806                predicate,
4807            } => {
4808                if let PlanNode::SeqScan { table: t } = inner.as_ref() {
4809                    if t != table {
4810                        return self.generic_rid_match(input, table);
4811                    }
4812                    let schema = self
4813                        .catalog
4814                        .schema(table)
4815                        .ok_or_else(|| format!("table '{table}' not found"))?;
4816                    let columns: Vec<String> =
4817                        schema.columns.iter().map(|c| c.name.clone()).collect();
4818                    let fast = FastLayout::new(schema);
4819                    let row_layout = RowLayout::new(schema);
4820
4821                    // Try compiled predicate first.
4822                    if let Some(compiled) = compile_predicate(predicate, &columns, &fast, schema) {
4823                        // Mission F: skip the first 4 Vec doublings.
4824                        let mut rids: Vec<RowId> = Vec::with_capacity(64);
4825                        self.catalog
4826                            .for_each_row_raw(table, |rid, data| {
4827                                if compiled(data) {
4828                                    rids.push(rid);
4829                                }
4830                            })
4831                            .map_err(|e| e.to_string())?;
4832                        return Ok(rids);
4833                    }
4834
4835                    // Fallback: selective decode + eval.
4836                    let pred_cols = predicate_column_indices(predicate, &columns);
4837                    let mut rids: Vec<RowId> = Vec::with_capacity(64);
4838                    self.catalog
4839                        .for_each_row_raw(table, |rid, data| {
4840                            let pred_row = decode_selective(schema, &row_layout, data, &pred_cols);
4841                            if eval_predicate(predicate, &pred_row, &columns) {
4842                                rids.push(rid);
4843                            }
4844                        })
4845                        .map_err(|e| e.to_string())?;
4846                    return Ok(rids);
4847                }
4848                self.generic_rid_match(input, table)
4849            }
4850            _ => self.generic_rid_match(input, table),
4851        }
4852    }
4853
4854    /// Last-ditch generic match: execute the plan, collect matching rows,
4855    /// then find corresponding RowIds by value equality. This is the old
4856    /// O(N*M) code path; only used when the plan shape is something exotic.
4857    fn generic_rid_match(&mut self, input: &PlanNode, table: &str) -> Result<Vec<RowId>, String> {
4858        let result = self.execute_plan(input)?;
4859        let rows = match result {
4860            QueryResult::Rows { rows, .. } => rows,
4861            _ => return Err("mutation source must be rows".into()),
4862        };
4863        let matching: Vec<RowId> = self
4864            .catalog
4865            .scan(table)
4866            .map_err(|e| e.to_string())?
4867            .filter(|(_, row)| rows.iter().any(|r| r == row))
4868            .map(|(rid, _)| rid)
4869            .collect();
4870        Ok(matching)
4871    }
4872
4873    pub fn catalog(&self) -> &Catalog {
4874        &self.catalog
4875    }
4876
4877    pub fn catalog_mut(&mut self) -> &mut Catalog {
4878        &mut self.catalog
4879    }
4880}
4881
4882/// Mission C Phase 4: precomputed byte-patch for the in-place update fast
4883/// path. Built once per `Update` query (outside the rid loop) and reused on
4884/// every matching row.
4885#[derive(Clone, Copy)]
4886struct FastPatch {
4887    /// Byte offset of the fixed column within the row encoding:
4888    /// `2 + bitmap_size + layout.fixed_offsets[col]`.
4889    field_off: usize,
4890    /// Byte offset of the bitmap byte containing this column's null bit
4891    /// (`2 + col/8`). We read-modify-write this byte to force the column
4892    /// non-null, so the idempotent clear is safe for already-non-null rows.
4893    bitmap_byte_off: usize,
4894    /// Bit mask for this column's null bit within `bitmap_byte_off`.
4895    bit_mask: u8,
4896    /// The new fixed-width value encoded as little-endian bytes.
4897    bytes: FixedBytes,
4898}
4899
4900#[derive(Clone, Copy)]
4901enum FixedBytes {
4902    I64([u8; 8]),
4903    F64([u8; 8]),
4904    Bool(u8),
4905    Uuid([u8; 16]),
4906}
4907
4908impl FixedBytes {
4909    #[inline]
4910    fn as_slice(&self) -> &[u8] {
4911        match self {
4912            FixedBytes::I64(b) => b.as_slice(),
4913            FixedBytes::F64(b) => b.as_slice(),
4914            FixedBytes::Bool(b) => std::slice::from_ref(b),
4915            FixedBytes::Uuid(b) => b.as_slice(),
4916        }
4917    }
4918}
4919
4920fn type_name_to_id(name: &str) -> TypeId {
4921    match name {
4922        "str" => TypeId::Str,
4923        "int" => TypeId::Int,
4924        "float" => TypeId::Float,
4925        "bool" => TypeId::Bool,
4926        "datetime" => TypeId::DateTime,
4927        "uuid" => TypeId::Uuid,
4928        "bytes" => TypeId::Bytes,
4929        _ => TypeId::Str,
4930    }
4931}
4932
4933/// Convert a runtime `Value` back into an `Expr::Literal` for InSubquery
4934/// materialization. Non-literal-representable values become `Literal::Int(0)`
4935/// (shouldn't happen in practice — subqueries return primitive columns).
4936/// Check if an expression tree contains any `InSubquery` nodes.
4937/// Collect all `Expr::Field` names referenced by an expression tree.
4938fn collect_field_refs(expr: &Expr, out: &mut Vec<String>) {
4939    match expr {
4940        Expr::Field(name) => out.push(name.clone()),
4941        Expr::QualifiedField { qualifier, field } => {
4942            out.push(format!("{qualifier}.{field}"));
4943        }
4944        Expr::BinaryOp(l, _, r) => {
4945            collect_field_refs(l, out);
4946            collect_field_refs(r, out);
4947        }
4948        Expr::UnaryOp(_, inner) => collect_field_refs(inner, out),
4949        Expr::FunctionCall(_, inner) => collect_field_refs(inner, out),
4950        Expr::Coalesce(l, r) => {
4951            collect_field_refs(l, out);
4952            collect_field_refs(r, out);
4953        }
4954        Expr::InList { expr, list, .. } => {
4955            collect_field_refs(expr, out);
4956            for item in list {
4957                collect_field_refs(item, out);
4958            }
4959        }
4960        Expr::ScalarFunc(_, args) => {
4961            for a in args {
4962                collect_field_refs(a, out);
4963            }
4964        }
4965        Expr::Cast(inner, _) => {
4966            collect_field_refs(inner, out);
4967        }
4968        Expr::Case { whens, else_expr } => {
4969            for (c, r) in whens {
4970                collect_field_refs(c, out);
4971                collect_field_refs(r, out);
4972            }
4973            if let Some(e) = else_expr {
4974                collect_field_refs(e, out);
4975            }
4976        }
4977        _ => {}
4978    }
4979}
4980
4981/// Detect whether a subquery is correlated: any `Expr::Field` reference in
4982/// the subquery's filter that doesn't match a column in the subquery's
4983/// source table indicates a reference to an outer scope.
4984/// Replace outer-scope field references in a correlated subquery's filter
4985/// with literal values from the current outer row. Fields that belong to
4986/// the subquery's own source table are left unchanged.
4987fn substitute_outer_refs(
4988    expr: &Expr,
4989    subquery_source: &str,
4990    catalog: &Catalog,
4991    outer_row: &[Value],
4992    outer_columns: &[String],
4993) -> Expr {
4994    let sub_cols: Vec<String> = catalog
4995        .schema(subquery_source)
4996        .map(|s| s.columns.iter().map(|c| c.name.clone()).collect())
4997        .unwrap_or_default();
4998    substitute_outer_refs_inner(expr, &sub_cols, outer_row, outer_columns)
4999}
5000
5001fn substitute_outer_refs_inner(
5002    expr: &Expr,
5003    sub_cols: &[String],
5004    outer_row: &[Value],
5005    outer_columns: &[String],
5006) -> Expr {
5007    match expr {
5008        Expr::Field(name) => {
5009            if sub_cols.iter().any(|c| c == name) {
5010                expr.clone()
5011            } else if let Some(i) = outer_columns.iter().position(|c| c == name) {
5012                value_to_expr(outer_row[i].clone())
5013            } else {
5014                expr.clone()
5015            }
5016        }
5017        Expr::BinaryOp(l, op, r) => {
5018            let l = substitute_outer_refs_inner(l, sub_cols, outer_row, outer_columns);
5019            let r = substitute_outer_refs_inner(r, sub_cols, outer_row, outer_columns);
5020            Expr::BinaryOp(Box::new(l), *op, Box::new(r))
5021        }
5022        Expr::UnaryOp(op, inner) => {
5023            let inner = substitute_outer_refs_inner(inner, sub_cols, outer_row, outer_columns);
5024            Expr::UnaryOp(*op, Box::new(inner))
5025        }
5026        Expr::InList {
5027            expr: e,
5028            list,
5029            negated,
5030        } => {
5031            let e = substitute_outer_refs_inner(e, sub_cols, outer_row, outer_columns);
5032            let list = list
5033                .iter()
5034                .map(|item| substitute_outer_refs_inner(item, sub_cols, outer_row, outer_columns))
5035                .collect();
5036            Expr::InList {
5037                expr: Box::new(e),
5038                list,
5039                negated: *negated,
5040            }
5041        }
5042        Expr::Coalesce(l, r) => {
5043            let l = substitute_outer_refs_inner(l, sub_cols, outer_row, outer_columns);
5044            let r = substitute_outer_refs_inner(r, sub_cols, outer_row, outer_columns);
5045            Expr::Coalesce(Box::new(l), Box::new(r))
5046        }
5047        other => other.clone(),
5048    }
5049}
5050
5051fn is_correlated_subquery(subquery: &QueryExpr, catalog: &Catalog) -> bool {
5052    let filter = match &subquery.filter {
5053        Some(f) => f,
5054        None => return false,
5055    };
5056    let schema = match catalog.schema(&subquery.source) {
5057        Some(s) => s,
5058        None => return false, // table not found — not correlation, just an error
5059    };
5060    let table_cols: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
5061    let mut refs = Vec::new();
5062    collect_field_refs(filter, &mut refs);
5063    // If any referenced field doesn't exist in the subquery's source table,
5064    // it's (probably) a reference to an outer scope — i.e., correlated.
5065    refs.iter().any(|r| {
5066        // Skip qualified references (alias.field) — they unambiguously
5067        // target a specific source and will only match the subquery's own
5068        // source if they share the alias.
5069        if r.contains('.') {
5070            let alias = subquery.alias.as_deref().unwrap_or(&subquery.source);
5071            !r.starts_with(alias)
5072        } else {
5073            !table_cols.iter().any(|c| c == r)
5074        }
5075    })
5076}
5077
5078fn contains_subquery(expr: &Expr) -> bool {
5079    match expr {
5080        Expr::InSubquery { .. } => true,
5081        Expr::ExistsSubquery { .. } => true,
5082        Expr::BinaryOp(l, _, r) => contains_subquery(l) || contains_subquery(r),
5083        Expr::UnaryOp(_, inner) => contains_subquery(inner),
5084        Expr::InList { expr, list, .. } => {
5085            contains_subquery(expr) || list.iter().any(contains_subquery)
5086        }
5087        Expr::Case { whens, else_expr } => {
5088            whens
5089                .iter()
5090                .any(|(c, r)| contains_subquery(c) || contains_subquery(r))
5091                || else_expr.as_ref().is_some_and(|e| contains_subquery(e))
5092        }
5093        Expr::ScalarFunc(_, args) => args.iter().any(contains_subquery),
5094        Expr::Cast(inner, _) => contains_subquery(inner),
5095        Expr::FunctionCall(_, inner) => contains_subquery(inner),
5096        Expr::Coalesce(l, r) => contains_subquery(l) || contains_subquery(r),
5097        _ => false,
5098    }
5099}
5100
5101fn value_to_expr(val: Value) -> Expr {
5102    match val {
5103        Value::Int(v) => Expr::Literal(Literal::Int(v)),
5104        Value::Float(v) => Expr::Literal(Literal::Float(v)),
5105        Value::Str(v) => Expr::Literal(Literal::String(v)),
5106        Value::Bool(v) => Expr::Literal(Literal::Bool(v)),
5107        _ => Expr::Literal(Literal::Int(0)),
5108    }
5109}
5110
5111fn literal_to_value(expr: &Expr) -> Result<Value, String> {
5112    match expr {
5113        Expr::Literal(Literal::Int(v)) => Ok(Value::Int(*v)),
5114        Expr::Literal(Literal::Float(v)) => Ok(Value::Float(*v)),
5115        Expr::Literal(Literal::String(v)) => Ok(Value::Str(v.clone())),
5116        Expr::Literal(Literal::Bool(v)) => Ok(Value::Bool(*v)),
5117        _ => Err("expected literal value".into()),
5118    }
5119}
5120
5121/// Mission C Phase 5: direct Literal→Value conversion used by the
5122/// prepared-statement Insert fast path. Skips the `Expr::Literal` unwrap
5123/// and the `Result` plumbing of [`literal_to_value`]. String literals
5124/// still clone because the row needs an owned `Value::Str`.
5125#[inline]
5126fn literal_value_from(lit: &Literal) -> Value {
5127    match lit {
5128        Literal::Int(v) => Value::Int(*v),
5129        Literal::Float(v) => Value::Float(*v),
5130        Literal::String(v) => Value::Str(v.clone()),
5131        Literal::Bool(v) => Value::Bool(*v),
5132    }
5133}
5134
5135/// Mission C Phase 13: moving companion to [`literal_value_from`] used
5136/// by [`Engine::execute_prepared_take`]. Pulls the `String` out of a
5137/// `Literal::String` via `mem::take`, leaving an empty string behind
5138/// so the caller's slice remains valid (but with blanked-out strings).
5139/// On the insert fast path this removes one heap alloc per string
5140/// column per row.
5141#[inline]
5142fn literal_value_take(lit: &mut Literal) -> Value {
5143    match lit {
5144        Literal::Int(v) => Value::Int(*v),
5145        Literal::Float(v) => Value::Float(*v),
5146        Literal::String(v) => Value::Str(std::mem::take(v)),
5147        Literal::Bool(v) => Value::Bool(*v),
5148    }
5149}
5150
5151fn eval_expr(expr: &Expr, row: &[Value], columns: &[String]) -> Value {
5152    match expr {
5153        Expr::Field(name) => columns
5154            .iter()
5155            .position(|c| c == name)
5156            .map(|i| row[i].clone())
5157            .unwrap_or(Value::Empty),
5158        Expr::QualifiedField { qualifier, field } => {
5159            // Mission E1.2: join queries emit columns named `alias.field`,
5160            // so the lookup is a direct prefix+tail match. We compare in
5161            // pieces to avoid allocating a fresh `format!("{q}.{f}")` on
5162            // every row — the join loop can evaluate this tens of thousands
5163            // of times per query.
5164            let q = qualifier.as_bytes();
5165            let f = field.as_bytes();
5166            let idx = columns.iter().position(|c| {
5167                let b = c.as_bytes();
5168                b.len() == q.len() + 1 + f.len()
5169                    && b[..q.len()] == *q
5170                    && b[q.len()] == b'.'
5171                    && b[q.len() + 1..] == *f
5172            });
5173            idx.map(|i| row[i].clone()).unwrap_or(Value::Empty)
5174        }
5175        Expr::Literal(lit) => match lit {
5176            Literal::Int(v) => Value::Int(*v),
5177            Literal::Float(v) => Value::Float(*v),
5178            Literal::String(v) => Value::Str(v.clone()),
5179            Literal::Bool(v) => Value::Bool(*v),
5180        },
5181        Expr::BinaryOp(left, op, right) => {
5182            let l = eval_expr(left, row, columns);
5183            let r = eval_expr(right, row, columns);
5184            eval_binop(&l, *op, &r)
5185        }
5186        Expr::Coalesce(left, right) => {
5187            let l = eval_expr(left, row, columns);
5188            if l.is_empty() {
5189                eval_expr(right, row, columns)
5190            } else {
5191                l
5192            }
5193        }
5194        Expr::InList {
5195            expr,
5196            list,
5197            negated,
5198        } => {
5199            let val = eval_expr(expr, row, columns);
5200            let found = list.iter().any(|item| {
5201                let iv = eval_expr(item, row, columns);
5202                val == iv
5203            });
5204            Value::Bool(if *negated { !found } else { found })
5205        }
5206        Expr::InSubquery { .. } => {
5207            // Should have been materialized into InList before eval_expr.
5208            Value::Empty
5209        }
5210        Expr::ExistsSubquery { .. } => {
5211            // Should have been materialized into a Bool literal before
5212            // eval_expr (see materialize_subqueries).
5213            Value::Empty
5214        }
5215        Expr::UnaryOp(op, inner) => {
5216            let v = eval_expr(inner, row, columns);
5217            match op {
5218                UnaryOp::Not => match v {
5219                    Value::Bool(b) => Value::Bool(!b),
5220                    _ => Value::Empty,
5221                },
5222                UnaryOp::Exists => Value::Bool(!v.is_empty()),
5223                UnaryOp::NotExists => Value::Bool(v.is_empty()),
5224                UnaryOp::IsNull => Value::Bool(v.is_empty()),
5225                UnaryOp::IsNotNull => Value::Bool(!v.is_empty()),
5226            }
5227        }
5228        Expr::ScalarFunc(func, args) => {
5229            let vals: Vec<Value> = args.iter().map(|a| eval_expr(a, row, columns)).collect();
5230            eval_scalar_func(*func, &vals)
5231        }
5232        Expr::Case { whens, else_expr } => {
5233            for (condition, result) in whens {
5234                if eval_predicate(condition, row, columns) {
5235                    return eval_expr(result, row, columns);
5236                }
5237            }
5238            match else_expr {
5239                Some(e) => eval_expr(e, row, columns),
5240                None => Value::Empty,
5241            }
5242        }
5243        Expr::Cast(inner, cast_type) => {
5244            let val = eval_expr(inner, row, columns);
5245            eval_cast(val, *cast_type)
5246        }
5247        Expr::FunctionCall(_, _) | Expr::Param(_) | Expr::Window { .. } => Value::Empty,
5248    }
5249}
5250
5251fn eval_predicate(expr: &Expr, row: &[Value], columns: &[String]) -> bool {
5252    match eval_expr(expr, row, columns) {
5253        Value::Bool(b) => b,
5254        _ => false,
5255    }
5256}
5257
5258fn eval_scalar_func(func: ScalarFn, args: &[Value]) -> Value {
5259    match func {
5260        ScalarFn::Upper => match args.first() {
5261            Some(Value::Str(s)) => Value::Str(s.to_uppercase()),
5262            _ => Value::Empty,
5263        },
5264        ScalarFn::Lower => match args.first() {
5265            Some(Value::Str(s)) => Value::Str(s.to_lowercase()),
5266            _ => Value::Empty,
5267        },
5268        ScalarFn::Length => match args.first() {
5269            Some(Value::Str(s)) => Value::Int(s.len() as i64),
5270            _ => Value::Empty,
5271        },
5272        ScalarFn::Trim => match args.first() {
5273            Some(Value::Str(s)) => Value::Str(s.trim().to_string()),
5274            _ => Value::Empty,
5275        },
5276        ScalarFn::Substring => {
5277            if args.len() < 3 {
5278                return Value::Empty;
5279            }
5280            match (&args[0], &args[1], &args[2]) {
5281                (Value::Str(s), Value::Int(start), Value::Int(len)) => {
5282                    let start = (*start as usize).saturating_sub(1); // 1-indexed
5283                    let len = *len as usize;
5284                    let sub: String = s.chars().skip(start).take(len).collect();
5285                    Value::Str(sub)
5286                }
5287                _ => Value::Empty,
5288            }
5289        }
5290        ScalarFn::Concat => {
5291            let mut result = String::new();
5292            for v in args {
5293                match v {
5294                    Value::Str(s) => result.push_str(s),
5295                    Value::Int(n) => result.push_str(&n.to_string()),
5296                    Value::Float(f) => result.push_str(&f.to_string()),
5297                    Value::Bool(b) => result.push_str(if *b { "true" } else { "false" }),
5298                    _ => {}
5299                }
5300            }
5301            Value::Str(result)
5302        }
5303        // Math functions
5304        ScalarFn::Abs => match args.first() {
5305            Some(Value::Int(n)) => Value::Int(n.abs()),
5306            Some(Value::Float(f)) => Value::Float(f.abs()),
5307            _ => Value::Empty,
5308        },
5309        ScalarFn::Round => {
5310            let decimals = match args.get(1) {
5311                Some(Value::Int(d)) => *d as i32,
5312                _ => 0,
5313            };
5314            match args.first() {
5315                Some(Value::Float(f)) => {
5316                    let factor = 10_f64.powi(decimals);
5317                    Value::Float((f * factor).round() / factor)
5318                }
5319                Some(Value::Int(n)) => Value::Int(*n),
5320                _ => Value::Empty,
5321            }
5322        }
5323        ScalarFn::Ceil => match args.first() {
5324            Some(Value::Float(f)) => Value::Float(f.ceil()),
5325            Some(Value::Int(n)) => Value::Int(*n),
5326            _ => Value::Empty,
5327        },
5328        ScalarFn::Floor => match args.first() {
5329            Some(Value::Float(f)) => Value::Float(f.floor()),
5330            Some(Value::Int(n)) => Value::Int(*n),
5331            _ => Value::Empty,
5332        },
5333        ScalarFn::Sqrt => match args.first() {
5334            Some(Value::Float(f)) if *f >= 0.0 => Value::Float(f.sqrt()),
5335            Some(Value::Int(n)) if *n >= 0 => Value::Float((*n as f64).sqrt()),
5336            _ => Value::Empty,
5337        },
5338        ScalarFn::Pow => match (args.first(), args.get(1)) {
5339            (Some(Value::Float(base)), Some(Value::Float(exp))) => Value::Float(base.powf(*exp)),
5340            (Some(Value::Float(base)), Some(Value::Int(exp))) => {
5341                Value::Float(base.powi(*exp as i32))
5342            }
5343            (Some(Value::Int(base)), Some(Value::Int(exp))) => {
5344                if *exp >= 0 && *exp <= u32::MAX as i64 {
5345                    match base.checked_pow(*exp as u32) {
5346                        Some(v) => Value::Int(v),
5347                        None => Value::Float((*base as f64).powi(*exp as i32)),
5348                    }
5349                } else {
5350                    Value::Float((*base as f64).powi(*exp as i32))
5351                }
5352            }
5353            (Some(Value::Int(base)), Some(Value::Float(exp))) => {
5354                Value::Float((*base as f64).powf(*exp))
5355            }
5356            _ => Value::Empty,
5357        },
5358        // Date/time functions
5359        ScalarFn::Now => {
5360            use std::time::{SystemTime, UNIX_EPOCH};
5361            let micros = SystemTime::now()
5362                .duration_since(UNIX_EPOCH)
5363                .unwrap_or_default()
5364                .as_micros() as i64;
5365            Value::DateTime(micros)
5366        }
5367        ScalarFn::Extract => {
5368            // extract("part", datetime_expr)
5369            let part = match args.first() {
5370                Some(Value::Str(s)) => s.as_str(),
5371                _ => return Value::Empty,
5372            };
5373            let micros = match args.get(1) {
5374                Some(Value::DateTime(m)) => *m,
5375                Some(Value::Int(m)) => *m, // treat raw int as micros
5376                _ => return Value::Empty,
5377            };
5378            datetime_extract(part, micros)
5379        }
5380        ScalarFn::DateAdd => {
5381            // date_add(datetime_expr, amount, "unit")
5382            let micros = match args.first() {
5383                Some(Value::DateTime(m)) => *m,
5384                Some(Value::Int(m)) => *m,
5385                _ => return Value::Empty,
5386            };
5387            let amount = match args.get(1) {
5388                Some(Value::Int(n)) => *n,
5389                _ => return Value::Empty,
5390            };
5391            let unit = match args.get(2) {
5392                Some(Value::Str(s)) => s.as_str(),
5393                _ => return Value::Empty,
5394            };
5395            let delta_micros = match unit {
5396                "microsecond" | "microseconds" | "us" => amount,
5397                "millisecond" | "milliseconds" | "ms" => amount * 1_000,
5398                "second" | "seconds" | "s" => amount * 1_000_000,
5399                "minute" | "minutes" | "m" => amount * 60_000_000,
5400                "hour" | "hours" | "h" => amount * 3_600_000_000,
5401                "day" | "days" | "d" => amount * 86_400_000_000,
5402                _ => return Value::Empty,
5403            };
5404            Value::DateTime(micros + delta_micros)
5405        }
5406        ScalarFn::DateDiff => {
5407            // date_diff(dt1, dt2, "unit")
5408            let m1 = match args.first() {
5409                Some(Value::DateTime(m)) => *m,
5410                Some(Value::Int(m)) => *m,
5411                _ => return Value::Empty,
5412            };
5413            let m2 = match args.get(1) {
5414                Some(Value::DateTime(m)) => *m,
5415                Some(Value::Int(m)) => *m,
5416                _ => return Value::Empty,
5417            };
5418            let unit = match args.get(2) {
5419                Some(Value::Str(s)) => s.as_str(),
5420                _ => return Value::Empty,
5421            };
5422            let diff = m1 - m2;
5423            let result = match unit {
5424                "microsecond" | "microseconds" | "us" => diff,
5425                "millisecond" | "milliseconds" | "ms" => diff / 1_000,
5426                "second" | "seconds" | "s" => diff / 1_000_000,
5427                "minute" | "minutes" | "m" => diff / 60_000_000,
5428                "hour" | "hours" | "h" => diff / 3_600_000_000,
5429                "day" | "days" | "d" => diff / 86_400_000_000,
5430                _ => return Value::Empty,
5431            };
5432            Value::Int(result)
5433        }
5434    }
5435}
5436
5437/// Extract a component from a DateTime value (microseconds since epoch).
5438fn datetime_extract(part: &str, micros: i64) -> Value {
5439    // Convert micros to seconds + remainder for calendar calculations
5440    let total_secs = micros / 1_000_000;
5441    let micro_rem = micros % 1_000_000;
5442
5443    // Simple civil calendar from Unix timestamp (no TZ — UTC assumed)
5444    let days_since_epoch = if total_secs >= 0 {
5445        total_secs / 86400
5446    } else {
5447        (total_secs - 86399) / 86400
5448    };
5449    let secs_of_day = total_secs - days_since_epoch * 86400;
5450
5451    match part {
5452        "hour" => Value::Int(secs_of_day / 3600),
5453        "minute" => Value::Int((secs_of_day % 3600) / 60),
5454        "second" => Value::Int(secs_of_day % 60),
5455        "millisecond" => Value::Int(micro_rem / 1000),
5456        "microsecond" => Value::Int(micro_rem),
5457        "epoch" => Value::Int(total_secs),
5458        "year" | "month" | "day" => {
5459            // Civil date from days since 1970-01-01 (algorithm from Howard Hinnant)
5460            let z = days_since_epoch + 719468;
5461            let era = if z >= 0 { z } else { z - 146096 } / 146097;
5462            let doe = (z - era * 146097) as u32;
5463            let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
5464            let y = (yoe as i64) + era * 400;
5465            let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
5466            let mp = (5 * doy + 2) / 153;
5467            let d = doy - (153 * mp + 2) / 5 + 1;
5468            let m = if mp < 10 { mp + 3 } else { mp - 9 };
5469            let y = if m <= 2 { y + 1 } else { y };
5470            match part {
5471                "year" => Value::Int(y),
5472                "month" => Value::Int(m as i64),
5473                "day" => Value::Int(d as i64),
5474                _ => unreachable!(),
5475            }
5476        }
5477        _ => Value::Empty,
5478    }
5479}
5480
5481/// Evaluate a CAST expression.
5482fn eval_cast(val: Value, target: CastType) -> Value {
5483    match target {
5484        CastType::Int => match val {
5485            Value::Int(n) => Value::Int(n),
5486            Value::Float(f) => Value::Int(f as i64),
5487            Value::Bool(b) => Value::Int(if b { 1 } else { 0 }),
5488            Value::Str(s) => s.parse::<i64>().map(Value::Int).unwrap_or(Value::Empty),
5489            Value::DateTime(m) => Value::Int(m),
5490            _ => Value::Empty,
5491        },
5492        CastType::Float => match val {
5493            Value::Float(f) => Value::Float(f),
5494            Value::Int(n) => Value::Float(n as f64),
5495            Value::Str(s) => s.parse::<f64>().map(Value::Float).unwrap_or(Value::Empty),
5496            Value::Bool(b) => Value::Float(if b { 1.0 } else { 0.0 }),
5497            _ => Value::Empty,
5498        },
5499        CastType::Str => match val {
5500            Value::Str(s) => Value::Str(s),
5501            Value::Int(n) => Value::Str(n.to_string()),
5502            Value::Float(f) => Value::Str(f.to_string()),
5503            Value::Bool(b) => Value::Str(b.to_string()),
5504            Value::DateTime(m) => Value::Str(m.to_string()),
5505            _ => Value::Empty,
5506        },
5507        CastType::Bool => match val {
5508            Value::Bool(b) => Value::Bool(b),
5509            Value::Int(n) => Value::Bool(n != 0),
5510            Value::Str(s) => match s.as_str() {
5511                "true" | "1" | "yes" => Value::Bool(true),
5512                "false" | "0" | "no" => Value::Bool(false),
5513                _ => Value::Empty,
5514            },
5515            _ => Value::Empty,
5516        },
5517        CastType::DateTime => match val {
5518            Value::DateTime(m) => Value::DateTime(m),
5519            Value::Int(m) => Value::DateTime(m),
5520            _ => Value::Empty,
5521        },
5522    }
5523}
5524
5525/// Execute window function computations. Shared by both read and write paths.
5526///
5527/// For each `WindowDef`:
5528///   1. Sort rows by (partition_by keys, order_by keys).
5529///   2. Walk sorted rows, detecting partition boundaries.
5530///   3. Compute the window value per row (running aggregates reset at
5531///      partition boundaries).
5532///   4. Append the computed column to each row and register the column name.
5533///
5534/// All computed columns are appended to the original row data; the
5535/// downstream `Project` node plucks the ones the user asked for.
5536fn execute_window(result: QueryResult, windows: &[WindowDef]) -> Result<QueryResult, String> {
5537    let (mut columns, mut rows) = match result {
5538        QueryResult::Rows { columns, rows } => (columns, rows),
5539        _ => return Err("window function requires row input".into()),
5540    };
5541
5542    for wdef in windows {
5543        // Resolve partition/order column indices against current columns.
5544        let part_indices: Vec<usize> = wdef
5545            .partition_by
5546            .iter()
5547            .map(|name| {
5548                columns
5549                    .iter()
5550                    .position(|c| c == name)
5551                    .ok_or_else(|| format!("window partition column '{name}' not found"))
5552            })
5553            .collect::<Result<Vec<_>, _>>()?;
5554
5555        let ord_indices: Vec<(usize, bool)> = wdef
5556            .order_by
5557            .iter()
5558            .map(|sk| {
5559                columns
5560                    .iter()
5561                    .position(|c| c == &sk.field)
5562                    .map(|i| (i, sk.descending))
5563                    .ok_or_else(|| format!("window order column '{}' not found", sk.field))
5564            })
5565            .collect::<Result<Vec<_>, _>>()?;
5566
5567        // Resolve the argument column index (for aggregate windows).
5568        let arg_col_idx: Option<usize> = if let Some(arg) = wdef.args.first() {
5569            match arg {
5570                Expr::Field(name) => {
5571                    if name == "*" {
5572                        None // count(*) style — no specific column
5573                    } else {
5574                        Some(
5575                            columns
5576                                .iter()
5577                                .position(|c| c == name)
5578                                .ok_or_else(|| format!("window arg column '{name}' not found"))?,
5579                        )
5580                    }
5581                }
5582                _ => None,
5583            }
5584        } else {
5585            None
5586        };
5587
5588        // Build a sort-index to sort rows by partition_by then order_by
5589        // without actually reordering the original Vec (we need original
5590        // order to write results back).
5591        let n = rows.len();
5592        let mut indices: Vec<usize> = (0..n).collect();
5593        indices.sort_by(|&a, &b| {
5594            // Compare partition keys first.
5595            for &pi in &part_indices {
5596                let cmp = rows[a][pi].cmp(&rows[b][pi]);
5597                if cmp != std::cmp::Ordering::Equal {
5598                    return cmp;
5599                }
5600            }
5601            // Then order keys.
5602            for &(oi, desc) in &ord_indices {
5603                let cmp = rows[a][oi].cmp(&rows[b][oi]);
5604                if cmp != std::cmp::Ordering::Equal {
5605                    return if desc { cmp.reverse() } else { cmp };
5606                }
5607            }
5608            std::cmp::Ordering::Equal
5609        });
5610
5611        // Compute window values in sorted order, tracking partition boundaries.
5612        let mut win_values: Vec<Value> = vec![Value::Empty; n];
5613        let mut partition_start = 0usize;
5614        // Running state for aggregate windows:
5615        let mut running_count: i64 = 0;
5616        let mut running_int_sum: i64 = 0;
5617        let mut running_float_sum: f64 = 0.0;
5618        let mut running_saw_float = false;
5619        let mut running_min: Option<Value> = None;
5620        let mut running_max: Option<Value> = None;
5621        let mut rank_counter: i64 = 0;
5622        let mut dense_rank_counter: i64 = 0;
5623        let mut prev_order_key: Option<Vec<Value>> = None;
5624        let mut same_rank_count: i64 = 0;
5625
5626        for sorted_pos in 0..n {
5627            let row_idx = indices[sorted_pos];
5628
5629            // Detect partition boundary.
5630            let new_partition = if sorted_pos == 0 {
5631                true
5632            } else {
5633                let prev_row_idx = indices[sorted_pos - 1];
5634                part_indices
5635                    .iter()
5636                    .any(|&pi| rows[row_idx][pi] != rows[prev_row_idx][pi])
5637            };
5638
5639            if new_partition {
5640                partition_start = sorted_pos;
5641                running_count = 0;
5642                running_int_sum = 0;
5643                running_float_sum = 0.0;
5644                running_saw_float = false;
5645                running_min = None;
5646                running_max = None;
5647                rank_counter = 0;
5648                dense_rank_counter = 0;
5649                prev_order_key = None;
5650                same_rank_count = 0;
5651            }
5652
5653            // Extract current order key for rank tracking.
5654            let current_order_key: Vec<Value> = ord_indices
5655                .iter()
5656                .map(|&(oi, _)| rows[row_idx][oi].clone())
5657                .collect();
5658            let same_as_prev = prev_order_key.as_ref() == Some(&current_order_key);
5659
5660            let value = match wdef.function {
5661                WindowFunc::RowNumber => Value::Int((sorted_pos - partition_start + 1) as i64),
5662                WindowFunc::Rank => {
5663                    if same_as_prev {
5664                        same_rank_count += 1;
5665                    } else {
5666                        rank_counter += same_rank_count + 1;
5667                        same_rank_count = 0;
5668                        if rank_counter == 0 {
5669                            rank_counter = 1;
5670                        }
5671                    }
5672                    Value::Int(rank_counter)
5673                }
5674                WindowFunc::DenseRank => {
5675                    if !same_as_prev {
5676                        dense_rank_counter += 1;
5677                    }
5678                    Value::Int(dense_rank_counter)
5679                }
5680                WindowFunc::Sum => {
5681                    if let Some(ci) = arg_col_idx {
5682                        match &rows[row_idx][ci] {
5683                            Value::Int(v) => running_int_sum += v,
5684                            Value::Float(v) => {
5685                                running_float_sum += v;
5686                                running_saw_float = true;
5687                            }
5688                            _ => {}
5689                        }
5690                    }
5691                    if running_saw_float {
5692                        Value::Float(running_float_sum + running_int_sum as f64)
5693                    } else {
5694                        Value::Int(running_int_sum)
5695                    }
5696                }
5697                WindowFunc::Avg => {
5698                    if let Some(ci) = arg_col_idx {
5699                        match &rows[row_idx][ci] {
5700                            Value::Int(v) => {
5701                                running_float_sum += *v as f64;
5702                                running_count += 1;
5703                            }
5704                            Value::Float(v) => {
5705                                running_float_sum += v;
5706                                running_count += 1;
5707                            }
5708                            _ => {}
5709                        }
5710                    }
5711                    if running_count == 0 {
5712                        Value::Empty
5713                    } else {
5714                        Value::Float(running_float_sum / running_count as f64)
5715                    }
5716                }
5717                WindowFunc::Count => {
5718                    if let Some(ci) = arg_col_idx {
5719                        if !rows[row_idx][ci].is_empty() {
5720                            running_count += 1;
5721                        }
5722                    } else {
5723                        // count(*) — count all rows
5724                        running_count += 1;
5725                    }
5726                    Value::Int(running_count)
5727                }
5728                WindowFunc::Min => {
5729                    if let Some(ci) = arg_col_idx {
5730                        let v = &rows[row_idx][ci];
5731                        if !v.is_empty() {
5732                            running_min = Some(match &running_min {
5733                                None => v.clone(),
5734                                Some(cur) => {
5735                                    if v < cur {
5736                                        v.clone()
5737                                    } else {
5738                                        cur.clone()
5739                                    }
5740                                }
5741                            });
5742                        }
5743                    }
5744                    running_min.clone().unwrap_or(Value::Empty)
5745                }
5746                WindowFunc::Max => {
5747                    if let Some(ci) = arg_col_idx {
5748                        let v = &rows[row_idx][ci];
5749                        if !v.is_empty() {
5750                            running_max = Some(match &running_max {
5751                                None => v.clone(),
5752                                Some(cur) => {
5753                                    if v > cur {
5754                                        v.clone()
5755                                    } else {
5756                                        cur.clone()
5757                                    }
5758                                }
5759                            });
5760                        }
5761                    }
5762                    running_max.clone().unwrap_or(Value::Empty)
5763                }
5764            };
5765
5766            prev_order_key = Some(current_order_key);
5767            win_values[row_idx] = value;
5768        }
5769
5770        // Append the computed window column to each row.
5771        for (ri, row) in rows.iter_mut().enumerate() {
5772            row.push(win_values[ri].clone());
5773        }
5774        columns.push(wdef.output_name.clone());
5775    }
5776
5777    Ok(QueryResult::Rows { columns, rows })
5778}
5779
5780/// Mission E2b: compute one aggregate over a set of rows in a group.
5781fn compute_group_aggregate(
5782    func: AggFunc,
5783    all_rows: &[Vec<Value>],
5784    row_indices: &[usize],
5785    col_idx: usize,
5786) -> Value {
5787    match func {
5788        AggFunc::Count => {
5789            if col_idx == usize::MAX {
5790                // count(*) — count all rows in the group.
5791                return Value::Int(row_indices.len() as i64);
5792            }
5793            let count = row_indices
5794                .iter()
5795                .filter(|&&ri| !all_rows[ri][col_idx].is_empty())
5796                .count();
5797            Value::Int(count as i64)
5798        }
5799        AggFunc::CountDistinct => {
5800            let mut seen = std::collections::HashSet::new();
5801            for &ri in row_indices {
5802                let v = &all_rows[ri][col_idx];
5803                if !v.is_empty() {
5804                    seen.insert(v.clone());
5805                }
5806            }
5807            Value::Int(seen.len() as i64)
5808        }
5809        AggFunc::Sum => {
5810            // Mirror the scalar Sum path: accumulate int and float
5811            // contributions separately and promote the final result to
5812            // Float if any Float row was observed. Prevents silent
5813            // drop of Float columns in GROUP BY aggregates.
5814            let mut int_sum: i64 = 0;
5815            let mut float_sum: f64 = 0.0;
5816            let mut saw_float = false;
5817            for &ri in row_indices {
5818                match &all_rows[ri][col_idx] {
5819                    Value::Int(v) => int_sum += v,
5820                    Value::Float(v) => {
5821                        float_sum += *v;
5822                        saw_float = true;
5823                    }
5824                    _ => {}
5825                }
5826            }
5827            if saw_float {
5828                Value::Float(float_sum + int_sum as f64)
5829            } else {
5830                Value::Int(int_sum)
5831            }
5832        }
5833        AggFunc::Avg => {
5834            let mut sum = 0.0f64;
5835            let mut count = 0usize;
5836            for &ri in row_indices {
5837                match &all_rows[ri][col_idx] {
5838                    Value::Int(v) => {
5839                        sum += *v as f64;
5840                        count += 1;
5841                    }
5842                    Value::Float(v) => {
5843                        sum += *v;
5844                        count += 1;
5845                    }
5846                    _ => {}
5847                }
5848            }
5849            if count == 0 {
5850                Value::Empty
5851            } else {
5852                Value::Float(sum / count as f64)
5853            }
5854        }
5855        AggFunc::Min => row_indices
5856            .iter()
5857            .map(|&ri| &all_rows[ri][col_idx])
5858            .filter(|v| !v.is_empty())
5859            .min()
5860            .cloned()
5861            .unwrap_or(Value::Empty),
5862        AggFunc::Max => row_indices
5863            .iter()
5864            .map(|&ri| &all_rows[ri][col_idx])
5865            .filter(|v| !v.is_empty())
5866            .max()
5867            .cloned()
5868            .unwrap_or(Value::Empty),
5869    }
5870}
5871
5872/// Mission E1.3: try to extract equi-join key indices from a join `on`
5873/// predicate. Returns `Some((left_col_idx, right_col_idx))` when the
5874/// predicate is exactly `L = R` (or `R = L`) and both sides resolve
5875/// cleanly — `L` to the left subtree's column list and `R` to the right
5876/// subtree's column list.
5877///
5878/// This is deliberately narrow. We only recognise the two shapes:
5879///   * `QualifiedField = QualifiedField`  (`u.id = o.user_id`)
5880///   * `Field = Field`                    (`.id = .user_id`, unqualified)
5881///
5882/// Anything else — conjunctions, constants, function calls, or predicates
5883/// that touch the same side on both halves — falls through to the
5884/// nested-loop path unchanged.
5885fn try_extract_equi_join_keys(
5886    pred: &Expr,
5887    left_columns: &[String],
5888    right_columns: &[String],
5889) -> Option<(usize, usize)> {
5890    let (lhs, op, rhs) = match pred {
5891        Expr::BinaryOp(l, op, r) => (l.as_ref(), *op, r.as_ref()),
5892        _ => return None,
5893    };
5894    if op != BinOp::Eq {
5895        return None;
5896    }
5897    // Normal orientation: lhs in left, rhs in right.
5898    if let (Some(li), Some(ri)) = (
5899        resolve_side_column(lhs, left_columns),
5900        resolve_side_column(rhs, right_columns),
5901    ) {
5902        return Some((li, ri));
5903    }
5904    // Swapped: rhs in left, lhs in right. Both sides of `=` are
5905    // commutative so this is safe.
5906    if let (Some(li), Some(ri)) = (
5907        resolve_side_column(rhs, left_columns),
5908        resolve_side_column(lhs, right_columns),
5909    ) {
5910        return Some((li, ri));
5911    }
5912    None
5913}
5914
5915fn resolve_side_column(expr: &Expr, columns: &[String]) -> Option<usize> {
5916    match expr {
5917        Expr::QualifiedField { qualifier, field } => {
5918            // Byte-level match so we don't allocate a fresh `format!` on
5919            // every call — this runs once per plan, so allocation would be
5920            // cheap, but the match is trivial enough to keep inline with
5921            // the eval_expr version.
5922            let q = qualifier.as_bytes();
5923            let f = field.as_bytes();
5924            columns.iter().position(|c| {
5925                let b = c.as_bytes();
5926                b.len() == q.len() + 1 + f.len()
5927                    && b[..q.len()] == *q
5928                    && b[q.len()] == b'.'
5929                    && b[q.len() + 1..] == *f
5930            })
5931        }
5932        Expr::Field(name) => columns.iter().position(|c| c == name),
5933        _ => None,
5934    }
5935}
5936
5937/// Mission E1.3: O(L + R) hash join. Builds a `FxHashMap<Value, Vec<usize>>`
5938/// over the right (inner) side's join keys, then streams the left (outer)
5939/// side and for each probe row emits every combined row whose right-side
5940/// key matches. For `JoinKind::LeftOuter`, unmatched left rows are emitted
5941/// padded with `Value::Empty` on the right side.
5942///
5943/// The right side is always the build side. That choice is forced for
5944/// LeftOuter (the left side must stream so we can detect orphans), and
5945/// for Inner it's a reasonable default — left-deep plans tend to grow the
5946/// left side with each join, so the un-joined right leaf is often the
5947/// smaller of the two at each level.
5948fn hash_join(
5949    left_columns: Vec<String>,
5950    left_rows: Vec<Vec<Value>>,
5951    right_columns: Vec<String>,
5952    right_rows: Vec<Vec<Value>>,
5953    left_key_idx: usize,
5954    right_key_idx: usize,
5955    kind: JoinKind,
5956) -> QueryResult {
5957    use rustc_hash::FxHashMap;
5958
5959    let n_left = left_columns.len();
5960    let n_right = right_columns.len();
5961    let mut columns = Vec::with_capacity(n_left + n_right);
5962    columns.extend(left_columns);
5963    columns.extend(right_columns);
5964
5965    // Build: right_key -> list of right-row indices. Pre-size to the row
5966    // count so the map doesn't rehash mid-build.
5967    let mut build: FxHashMap<Value, Vec<usize>> =
5968        FxHashMap::with_capacity_and_hasher(right_rows.len(), Default::default());
5969    for (i, row) in right_rows.iter().enumerate() {
5970        // Skip Empty keys on the build side — they can never match under
5971        // SQL semantics (NULL ≠ NULL) and would collapse all nullables to
5972        // one bucket.
5973        if matches!(row[right_key_idx], Value::Empty) {
5974            continue;
5975        }
5976        build.entry(row[right_key_idx].clone()).or_default().push(i);
5977    }
5978
5979    // Reasonable starting capacity — inner joins produce ≥ left_rows.len()
5980    // rows in the common 1:1 case, left-outer always emits ≥ left_rows.len().
5981    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(left_rows.len());
5982
5983    for left_row in &left_rows {
5984        let key = &left_row[left_key_idx];
5985        let matched = if matches!(key, Value::Empty) {
5986            None
5987        } else {
5988            build.get(key)
5989        };
5990        match matched {
5991            Some(matches) if !matches.is_empty() => {
5992                for &ri in matches {
5993                    let right_row = &right_rows[ri];
5994                    let mut combined = Vec::with_capacity(n_left + n_right);
5995                    combined.extend_from_slice(left_row);
5996                    combined.extend_from_slice(right_row);
5997                    rows.push(combined);
5998                }
5999            }
6000            _ => {
6001                if matches!(kind, JoinKind::LeftOuter) {
6002                    let mut row = Vec::with_capacity(n_left + n_right);
6003                    row.extend_from_slice(left_row);
6004                    row.resize(n_left + n_right, Value::Empty);
6005                    rows.push(row);
6006                }
6007            }
6008        }
6009    }
6010
6011    QueryResult::Rows { columns, rows }
6012}
6013
6014/// Lower unindexed `RangeScan` nodes to `Filter(SeqScan)` so that all
6015/// downstream fast paths (count, project+limit, sort+limit, agg, update,
6016/// delete) continue to fire.
6017///
6018/// The planner emits `RangeScan` speculatively for every range inequality
6019/// (`.age > 30`) because it has no catalog access. When the column has a
6020/// B-tree index, `RangeScan` is the correct plan. When it doesn't, the
6021/// executor's `RangeScan` fallback materialises every matching row with
6022/// full `decode_row` — bypassing the compiled-predicate fast paths that
6023/// `Filter(SeqScan)` would trigger.
6024///
6025/// This pass runs once per query, before execution.
6026fn lower_unindexed_range_scans(catalog: &Catalog, plan: &PlanNode) -> PlanNode {
6027    match plan {
6028        PlanNode::RangeScan {
6029            table,
6030            column,
6031            start,
6032            end,
6033        } => {
6034            if let Some(tbl) = catalog.get_table(table) {
6035                if tbl.index(column).is_some() {
6036                    return plan.clone();
6037                }
6038            }
6039            let pred = synthesize_range_predicate(column, start, end);
6040            PlanNode::Filter {
6041                input: Box::new(PlanNode::SeqScan {
6042                    table: table.clone(),
6043                }),
6044                predicate: pred,
6045            }
6046        }
6047        PlanNode::Filter { input, predicate } => PlanNode::Filter {
6048            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6049            predicate: predicate.clone(),
6050        },
6051        PlanNode::Project { input, fields } => PlanNode::Project {
6052            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6053            fields: fields.clone(),
6054        },
6055        PlanNode::Sort { input, keys } => PlanNode::Sort {
6056            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6057            keys: keys.clone(),
6058        },
6059        PlanNode::Limit { input, count } => PlanNode::Limit {
6060            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6061            count: count.clone(),
6062        },
6063        PlanNode::Offset { input, count } => PlanNode::Offset {
6064            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6065            count: count.clone(),
6066        },
6067        PlanNode::Aggregate {
6068            input,
6069            function,
6070            field,
6071        } => PlanNode::Aggregate {
6072            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6073            function: *function,
6074            field: field.clone(),
6075        },
6076        PlanNode::Distinct { input } => PlanNode::Distinct {
6077            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6078        },
6079        PlanNode::GroupBy {
6080            input,
6081            keys,
6082            aggregates,
6083            having,
6084        } => PlanNode::GroupBy {
6085            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6086            keys: keys.clone(),
6087            aggregates: aggregates.clone(),
6088            having: having.clone(),
6089        },
6090        PlanNode::Update {
6091            input,
6092            table,
6093            assignments,
6094        } => PlanNode::Update {
6095            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6096            table: table.clone(),
6097            assignments: assignments.clone(),
6098        },
6099        PlanNode::Delete { input, table } => PlanNode::Delete {
6100            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6101            table: table.clone(),
6102        },
6103        PlanNode::Window { input, windows } => PlanNode::Window {
6104            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6105            windows: windows.clone(),
6106        },
6107        PlanNode::Union { left, right, all } => PlanNode::Union {
6108            left: Box::new(lower_unindexed_range_scans(catalog, left)),
6109            right: Box::new(lower_unindexed_range_scans(catalog, right)),
6110            all: *all,
6111        },
6112        PlanNode::Explain { input } => PlanNode::Explain {
6113            input: Box::new(lower_unindexed_range_scans(catalog, input)),
6114        },
6115        PlanNode::NestedLoopJoin {
6116            left,
6117            right,
6118            on,
6119            kind,
6120        } => PlanNode::NestedLoopJoin {
6121            left: Box::new(lower_unindexed_range_scans(catalog, left)),
6122            right: Box::new(lower_unindexed_range_scans(catalog, right)),
6123            on: on.clone(),
6124            kind: *kind,
6125        },
6126        // Leaf nodes: no children to recurse into.
6127        _ => plan.clone(),
6128    }
6129}
6130
6131/// Synthesize a range predicate from RangeScan bounds for the fallback path.
6132fn synthesize_range_predicate(
6133    column: &str,
6134    start: &Option<(Expr, bool)>,
6135    end: &Option<(Expr, bool)>,
6136) -> Expr {
6137    let lower = start.as_ref().map(|(expr, inclusive)| {
6138        let op = if *inclusive { BinOp::Gte } else { BinOp::Gt };
6139        Expr::BinaryOp(
6140            Box::new(Expr::Field(column.to_string())),
6141            op,
6142            Box::new(expr.clone()),
6143        )
6144    });
6145    let upper = end.as_ref().map(|(expr, inclusive)| {
6146        let op = if *inclusive { BinOp::Lte } else { BinOp::Lt };
6147        Expr::BinaryOp(
6148            Box::new(Expr::Field(column.to_string())),
6149            op,
6150            Box::new(expr.clone()),
6151        )
6152    });
6153    match (lower, upper) {
6154        (Some(l), Some(u)) => Expr::BinaryOp(Box::new(l), BinOp::And, Box::new(u)),
6155        (Some(l), None) => l,
6156        (None, Some(u)) => u,
6157        (None, None) => Expr::Literal(Literal::Bool(true)),
6158    }
6159}
6160
6161/// Check if a value falls within a range (used in last-resort decoded-row eval).
6162fn range_matches(
6163    val: &Value,
6164    start: &Option<Value>,
6165    start_inc: bool,
6166    end: &Option<Value>,
6167    end_inc: bool,
6168) -> bool {
6169    if let Some(ref s) = start {
6170        if start_inc {
6171            if val < s {
6172                return false;
6173            }
6174        } else if val <= s {
6175            return false;
6176        }
6177    }
6178    if let Some(ref e) = end {
6179        if end_inc {
6180            if val > e {
6181                return false;
6182            }
6183        } else if val >= e {
6184            return false;
6185        }
6186    }
6187    true
6188}
6189
6190/// Format a `PlanNode` tree as a human-readable, indented text
6191/// representation. Used by the `EXPLAIN` command.
6192fn format_plan_tree(plan: &PlanNode, depth: usize) -> String {
6193    let indent = "  ".repeat(depth);
6194    match plan {
6195        PlanNode::SeqScan { table } => format!("{indent}SeqScan table={table}"),
6196        PlanNode::AliasScan { table, alias } => {
6197            format!("{indent}AliasScan table={table} alias={alias}")
6198        }
6199        PlanNode::IndexScan { table, column, key } => {
6200            format!("{indent}IndexScan table={table} column={column} key={key:?}")
6201        }
6202        PlanNode::RangeScan {
6203            table,
6204            column,
6205            start,
6206            end,
6207        } => {
6208            let s = match start {
6209                Some((expr, inc)) => {
6210                    let op = if *inc { ">=" } else { ">" };
6211                    format!("{op}{expr:?}")
6212                }
6213                None => "unbounded".to_string(),
6214            };
6215            let e = match end {
6216                Some((expr, inc)) => {
6217                    let op = if *inc { "<=" } else { "<" };
6218                    format!("{op}{expr:?}")
6219                }
6220                None => "unbounded".to_string(),
6221            };
6222            format!("{indent}RangeScan table={table} column={column} [{s}, {e}]")
6223        }
6224        PlanNode::Filter { input, predicate } => {
6225            let child = format_plan_tree(input, depth + 1);
6226            format!("{indent}Filter predicate={predicate:?}\n{child}")
6227        }
6228        PlanNode::Project { input, fields } => {
6229            let names: Vec<String> = fields
6230                .iter()
6231                .map(|f| match &f.alias {
6232                    Some(a) => format!("{a}: {:?}", f.expr),
6233                    None => format!("{:?}", f.expr),
6234                })
6235                .collect();
6236            let child = format_plan_tree(input, depth + 1);
6237            format!("{indent}Project fields=[{}]\n{child}", names.join(", "))
6238        }
6239        PlanNode::Sort { input, keys } => {
6240            let ks: Vec<String> = keys
6241                .iter()
6242                .map(|k| {
6243                    if k.descending {
6244                        format!("{} desc", k.field)
6245                    } else {
6246                        k.field.clone()
6247                    }
6248                })
6249                .collect();
6250            let child = format_plan_tree(input, depth + 1);
6251            format!("{indent}Sort keys=[{}]\n{child}", ks.join(", "))
6252        }
6253        PlanNode::Limit { input, count } => {
6254            let child = format_plan_tree(input, depth + 1);
6255            format!("{indent}Limit count={count:?}\n{child}")
6256        }
6257        PlanNode::Offset { input, count } => {
6258            let child = format_plan_tree(input, depth + 1);
6259            format!("{indent}Offset count={count:?}\n{child}")
6260        }
6261        PlanNode::Aggregate {
6262            input,
6263            function,
6264            field,
6265        } => {
6266            let f = field.as_deref().unwrap_or("*");
6267            let child = format_plan_tree(input, depth + 1);
6268            format!("{indent}Aggregate fn={function:?} field={f}\n{child}")
6269        }
6270        PlanNode::NestedLoopJoin {
6271            left,
6272            right,
6273            on,
6274            kind,
6275        } => {
6276            let left_child = format_plan_tree(left, depth + 1);
6277            let right_child = format_plan_tree(right, depth + 1);
6278            let on_str = match on {
6279                Some(pred) => format!("{pred:?}"),
6280                None => "none".to_string(),
6281            };
6282            format!("{indent}NestedLoopJoin kind={kind:?} on={on_str}\n{left_child}\n{right_child}")
6283        }
6284        PlanNode::Distinct { input } => {
6285            let child = format_plan_tree(input, depth + 1);
6286            format!("{indent}Distinct\n{child}")
6287        }
6288        PlanNode::GroupBy {
6289            input,
6290            keys,
6291            aggregates,
6292            having,
6293        } => {
6294            let agg_strs: Vec<String> = aggregates
6295                .iter()
6296                .map(|a| format!("{:?}({}) as {}", a.function, a.field, a.output_name))
6297                .collect();
6298            let having_str = match having {
6299                Some(h) => format!(" having={h:?}"),
6300                None => String::new(),
6301            };
6302            let child = format_plan_tree(input, depth + 1);
6303            format!(
6304                "{indent}GroupBy keys=[{}] aggs=[{}]{having_str}\n{child}",
6305                keys.join(", "),
6306                agg_strs.join(", "),
6307            )
6308        }
6309        PlanNode::Insert { table, assignments } => {
6310            let cols: Vec<&str> = assignments.iter().map(|a| a.field.as_str()).collect();
6311            format!("{indent}Insert table={table} cols=[{}]", cols.join(", "))
6312        }
6313        PlanNode::Upsert {
6314            table,
6315            key_column,
6316            assignments,
6317            on_conflict,
6318        } => {
6319            let cols: Vec<&str> = assignments.iter().map(|a| a.field.as_str()).collect();
6320            let conflict_cols: Vec<&str> = on_conflict.iter().map(|a| a.field.as_str()).collect();
6321            if conflict_cols.is_empty() {
6322                format!(
6323                    "{indent}Upsert table={table} key={key_column} cols=[{}]",
6324                    cols.join(", ")
6325                )
6326            } else {
6327                format!(
6328                    "{indent}Upsert table={table} key={key_column} cols=[{}] on_conflict=[{}]",
6329                    cols.join(", "),
6330                    conflict_cols.join(", ")
6331                )
6332            }
6333        }
6334        PlanNode::Update {
6335            input,
6336            table,
6337            assignments,
6338        } => {
6339            let cols: Vec<&str> = assignments.iter().map(|a| a.field.as_str()).collect();
6340            let child = format_plan_tree(input, depth + 1);
6341            format!(
6342                "{indent}Update table={table} set=[{}]\n{child}",
6343                cols.join(", ")
6344            )
6345        }
6346        PlanNode::Delete { input, table } => {
6347            let child = format_plan_tree(input, depth + 1);
6348            format!("{indent}Delete table={table}\n{child}")
6349        }
6350        PlanNode::CreateTable { name, fields } => {
6351            let fs: Vec<String> = fields
6352                .iter()
6353                .map(|(n, t, r)| {
6354                    if *r {
6355                        format!("{n}: {t} required")
6356                    } else {
6357                        format!("{n}: {t}")
6358                    }
6359                })
6360                .collect();
6361            format!("{indent}CreateTable name={name} fields=[{}]", fs.join(", "))
6362        }
6363        PlanNode::AlterTable { table, action } => {
6364            format!("{indent}AlterTable table={table} action={action:?}")
6365        }
6366        PlanNode::DropTable { name } => format!("{indent}DropTable name={name}"),
6367        PlanNode::CreateView { name, .. } => format!("{indent}CreateView name={name}"),
6368        PlanNode::RefreshView { name } => format!("{indent}RefreshView name={name}"),
6369        PlanNode::DropView { name } => format!("{indent}DropView name={name}"),
6370        PlanNode::Window { input, windows } => {
6371            let ws: Vec<String> = windows
6372                .iter()
6373                .map(|w| format!("{:?} as {}", w.function, w.output_name))
6374                .collect();
6375            let child = format_plan_tree(input, depth + 1);
6376            format!("{indent}Window fns=[{}]\n{child}", ws.join(", "))
6377        }
6378        PlanNode::Union { left, right, all } => {
6379            let kind = if *all { "UNION ALL" } else { "UNION" };
6380            let left_child = format_plan_tree(left, depth + 1);
6381            let right_child = format_plan_tree(right, depth + 1);
6382            format!("{indent}{kind}\n{left_child}\n{right_child}")
6383        }
6384        PlanNode::Explain { input } => {
6385            let child = format_plan_tree(input, depth + 1);
6386            format!("{indent}Explain\n{child}")
6387        }
6388    }
6389}
6390
6391/// Executor-local row layout — computes the layout facts the compiled
6392/// predicates and column readers need without touching the storage crate's
6393/// private `RowLayout` internals.
6394///
6395/// The row format is:
6396///   [length: u16][null_bitmap][fixed cols packed][var offset table: (n_var+1) u16s][var data]
6397struct FastLayout {
6398    /// Null bitmap size in bytes.
6399    bitmap_size: usize,
6400    /// Byte offset within the fixed region for each column (None = var-length).
6401    fixed_offsets: Vec<Option<usize>>,
6402    /// Size of the fixed region in bytes.
6403    fixed_region_size: usize,
6404    /// For each column: its slot index in the var-offset table (None = fixed).
6405    var_indices: Vec<Option<usize>>,
6406    /// Total number of variable-length columns.
6407    n_var: usize,
6408}
6409
6410impl FastLayout {
6411    fn new(schema: &Schema) -> Self {
6412        let n_cols = schema.columns.len();
6413        let bitmap_size = n_cols.div_ceil(8);
6414        let mut fixed_offsets = vec![None; n_cols];
6415        let mut var_indices = vec![None; n_cols];
6416        let mut fixed_pos: usize = 0;
6417        let mut var_count: usize = 0;
6418
6419        for (i, col) in schema.columns.iter().enumerate() {
6420            if is_fixed_size(col.type_id) {
6421                fixed_offsets[i] = Some(fixed_pos);
6422                fixed_pos += fixed_size(col.type_id).unwrap();
6423            } else {
6424                var_indices[i] = Some(var_count);
6425                var_count += 1;
6426            }
6427        }
6428
6429        FastLayout {
6430            bitmap_size,
6431            fixed_offsets,
6432            fixed_region_size: fixed_pos,
6433            var_indices,
6434            n_var: var_count,
6435        }
6436    }
6437
6438    /// Where the var-offset table starts within `data`.
6439    #[inline]
6440    fn var_offset_table_start(&self) -> usize {
6441        2 + self.bitmap_size + self.fixed_region_size
6442    }
6443
6444    /// Where the var-data region starts within `data`.
6445    #[inline]
6446    fn var_data_start(&self) -> usize {
6447        self.var_offset_table_start() + (self.n_var + 1) * 2
6448    }
6449}
6450
6451type CompiledPredicate = Box<dyn Fn(&[u8]) -> bool>;
6452
6453/// Map an f64 bit pattern to a u64 that orders under unsigned integer
6454/// comparison the same way `f64::total_cmp` orders the floats. Classic
6455/// sortable-float transform:
6456///   - Positive floats (sign bit 0): flip the sign bit. This maps
6457///     [+0, +∞, +NaN] to [0x8000…, 0xFFF0…, 0xFFF8…] — increasing as u64.
6458///   - Negative floats (sign bit 1): flip every bit. This maps
6459///     [-∞, -0] to [0x000F…, 0x7FFF…] — increasing as u64, and placed
6460///     *below* the positive range so negatives < positives.
6461///
6462/// Used by Mission D10 Float fast paths so we can key heaps on `u64`
6463/// (branch-cheap, folds into LLVM xor/sar/xor) instead of a `TotalF64`
6464/// newtype with `Ord::cmp` calling `total_cmp`.
6465#[inline]
6466fn f64_bits_to_sortable_u64(bits: u64) -> u64 {
6467    // `((bits >> 63) as i64 * -1) as u64 | 0x8000_0000_0000_0000`
6468    // would also work; the branchless form below is equally good on
6469    // modern CPUs and easier to read.
6470    if bits & 0x8000_0000_0000_0000 == 0 {
6471        bits ^ 0x8000_0000_0000_0000
6472    } else {
6473        !bits
6474    }
6475}
6476
6477/// A single flattened predicate leaf — pure data, no closures, no allocation
6478/// per call. Mission D3: replaces recursive Box<dyn Fn> conjunctions with a
6479/// `Vec<CompiledLeaf>` so the inner scan loop becomes a tight match instead
6480/// of N+1 vtable indirect calls per row.
6481enum CompiledLeaf {
6482    /// `.field <op> literal_int` (or reversed)
6483    Int {
6484        data_offset: usize,
6485        bitmap_byte: usize,
6486        bitmap_bit: u8,
6487        op: BinOp,
6488        literal: i64,
6489    },
6490    /// `.field <op> literal_float` (or reversed), where `.field` is a
6491    /// Float column. Int literals that bound a Float column (e.g.
6492    /// `.price > 100` on `price: float`) are also routed here, promoted
6493    /// to `f64` at compile time so the hot loop only sees one shape.
6494    /// Comparisons use `f64::total_cmp` so NaN handling is deterministic
6495    /// and consistent with `Value::Ord` across every read path.
6496    Float {
6497        data_offset: usize,
6498        bitmap_byte: usize,
6499        bitmap_bit: u8,
6500        op: BinOp,
6501        literal: f64,
6502    },
6503    /// `.field is null` or `.field is not null`
6504    IsNull {
6505        bitmap_byte: usize,
6506        bitmap_bit: u8,
6507        want_null: bool,
6508    },
6509    /// `.field = string_literal` or `.field != string_literal`
6510    StrEq {
6511        var_offset_table_start: usize,
6512        var_data_start: usize,
6513        var_idx: usize,
6514        bitmap_byte: usize,
6515        bitmap_bit: u8,
6516        negate: bool,
6517        needle: Vec<u8>,
6518    },
6519}
6520
6521impl CompiledLeaf {
6522    /// Evaluate this leaf against a row's raw bytes. `#[inline]` so the
6523    /// match folds into the caller's tight loop with LTO.
6524    #[inline]
6525    fn eval(&self, data: &[u8]) -> bool {
6526        match self {
6527            CompiledLeaf::Int {
6528                data_offset,
6529                bitmap_byte,
6530                bitmap_bit,
6531                op,
6532                literal,
6533            } => {
6534                let is_null = (data[2 + bitmap_byte] >> bitmap_bit) & 1 == 1;
6535                if is_null {
6536                    return false;
6537                }
6538                let val =
6539                    i64::from_le_bytes(data[*data_offset..*data_offset + 8].try_into().unwrap());
6540                match op {
6541                    BinOp::Eq => val == *literal,
6542                    BinOp::Neq => val != *literal,
6543                    BinOp::Lt => val < *literal,
6544                    BinOp::Gt => val > *literal,
6545                    BinOp::Lte => val <= *literal,
6546                    BinOp::Gte => val >= *literal,
6547                    _ => false,
6548                }
6549            }
6550            CompiledLeaf::Float {
6551                data_offset,
6552                bitmap_byte,
6553                bitmap_bit,
6554                op,
6555                literal,
6556            } => {
6557                let is_null = (data[2 + bitmap_byte] >> bitmap_bit) & 1 == 1;
6558                if is_null {
6559                    return false;
6560                }
6561                let val =
6562                    f64::from_le_bytes(data[*data_offset..*data_offset + 8].try_into().unwrap());
6563                // `total_cmp` matches Value::Ord: NaN > everything,
6564                // -0.0 < +0.0, finite order as expected. Keeps compiled
6565                // WHERE identical in semantics to the generic row-decode
6566                // path (which calls Value::cmp directly).
6567                let ord = val.total_cmp(literal);
6568                match op {
6569                    BinOp::Eq => ord.is_eq(),
6570                    BinOp::Neq => !ord.is_eq(),
6571                    BinOp::Lt => ord.is_lt(),
6572                    BinOp::Gt => ord.is_gt(),
6573                    BinOp::Lte => !ord.is_gt(),
6574                    BinOp::Gte => !ord.is_lt(),
6575                    _ => false,
6576                }
6577            }
6578            CompiledLeaf::IsNull {
6579                bitmap_byte,
6580                bitmap_bit,
6581                want_null,
6582            } => {
6583                let is_null = (data[2 + bitmap_byte] >> bitmap_bit) & 1 == 1;
6584                if *want_null {
6585                    is_null
6586                } else {
6587                    !is_null
6588                }
6589            }
6590            CompiledLeaf::StrEq {
6591                var_offset_table_start,
6592                var_data_start,
6593                var_idx,
6594                bitmap_byte,
6595                bitmap_bit,
6596                negate,
6597                needle,
6598            } => {
6599                let is_null = (data[2 + bitmap_byte] >> bitmap_bit) & 1 == 1;
6600                if is_null {
6601                    return false;
6602                }
6603                let off_pos = var_offset_table_start + var_idx * 2;
6604                let next_pos = var_offset_table_start + (var_idx + 1) * 2;
6605                let start =
6606                    u16::from_le_bytes(data[off_pos..off_pos + 2].try_into().unwrap()) as usize;
6607                let end =
6608                    u16::from_le_bytes(data[next_pos..next_pos + 2].try_into().unwrap()) as usize;
6609                let slice = &data[var_data_start + start..var_data_start + end];
6610                let eq = slice == needle.as_slice();
6611                if *negate {
6612                    !eq
6613                } else {
6614                    eq
6615                }
6616            }
6617        }
6618    }
6619}
6620
6621/// Attempt to compile a predicate expression into a closure over raw row
6622/// bytes. Returns None if the predicate contains shapes we don't handle
6623/// (arithmetic, Or, Coalesce, non-literal comparands, etc.). Supported:
6624///   - `.field <op> literal_int` and its reversed form
6625///   - `.field = string_literal` / `string_literal = .field`
6626///   - `And` conjunctions of any number of the above
6627///
6628/// Mission D3: AND chains are flattened into a single `Vec<CompiledLeaf>`
6629/// closed over by ONE outer closure. The previous implementation built a
6630/// recursive `Box<Fn>` per AND combinator, costing N+1 indirect vtable
6631/// calls per row for an N-leaf conjunction. The flat version dispatches
6632/// each leaf via match (predictable branch, fully inlinable with LTO),
6633/// short-circuiting on the first failing leaf.
6634fn compile_predicate(
6635    expr: &Expr,
6636    columns: &[String],
6637    layout: &FastLayout,
6638    schema: &Schema,
6639) -> Option<CompiledPredicate> {
6640    let mut leaves: Vec<CompiledLeaf> = Vec::new();
6641    flatten_and_compile(expr, columns, layout, schema, &mut leaves)?;
6642    if leaves.is_empty() {
6643        return None;
6644    }
6645    if leaves.len() == 1 {
6646        // Single-leaf fast path: skip the Vec iteration entirely.
6647        let leaf = leaves.into_iter().next().unwrap();
6648        return Some(Box::new(move |data: &[u8]| leaf.eval(data)));
6649    }
6650    Some(Box::new(move |data: &[u8]| {
6651        // Tight short-circuit AND loop. With CompiledLeaf::eval marked
6652        // #[inline], LTO can fold the match arms into this loop body.
6653        for leaf in &leaves {
6654            if !leaf.eval(data) {
6655                return false;
6656            }
6657        }
6658        true
6659    }))
6660}
6661
6662/// Recursively walk an AND chain and push each leaf into `out`. Returns
6663/// `None` if any sub-expression isn't a supported leaf shape.
6664fn flatten_and_compile(
6665    expr: &Expr,
6666    columns: &[String],
6667    layout: &FastLayout,
6668    schema: &Schema,
6669    out: &mut Vec<CompiledLeaf>,
6670) -> Option<()> {
6671    match expr {
6672        Expr::BinaryOp(left, BinOp::And, right) => {
6673            flatten_and_compile(left, columns, layout, schema, out)?;
6674            flatten_and_compile(right, columns, layout, schema, out)?;
6675            Some(())
6676        }
6677        Expr::BinaryOp(left, op, right) => {
6678            if let Some(leaf) = build_int_leaf(left, *op, right, columns, layout, schema) {
6679                out.push(leaf);
6680                return Some(());
6681            }
6682            if let Some(leaf) = build_float_leaf(left, *op, right, columns, layout, schema) {
6683                out.push(leaf);
6684                return Some(());
6685            }
6686            if let Some(leaf) = build_str_eq_leaf(left, *op, right, columns, layout, schema) {
6687                out.push(leaf);
6688                return Some(());
6689            }
6690            None
6691        }
6692        Expr::UnaryOp(op, inner) if *op == UnaryOp::IsNull || *op == UnaryOp::IsNotNull => {
6693            if let Expr::Field(name) = inner.as_ref() {
6694                let col_idx = columns.iter().position(|c| c == name)?;
6695                let bitmap_byte = col_idx / 8;
6696                let bitmap_bit = (col_idx % 8) as u8;
6697                let want_null = *op == UnaryOp::IsNull;
6698                out.push(CompiledLeaf::IsNull {
6699                    bitmap_byte,
6700                    bitmap_bit,
6701                    want_null,
6702                });
6703                Some(())
6704            } else {
6705                None
6706            }
6707        }
6708        _ => None,
6709    }
6710}
6711
6712/// Build an `Int` leaf from `.field <op> literal_int` (or reversed).
6713///
6714/// Only fires for columns whose declared type is `TypeId::Int`. If the
6715/// column is a different numeric type (Float, DateTime) we return `None`
6716/// so the caller falls back to the generic `Value::cmp` evaluation path,
6717/// which correctly handles cross-type numeric comparison (e.g. Int literal
6718/// vs Float column in `BETWEEN 100 AND 500` on a `price: float` column).
6719/// Previously this function read 8 bytes of a Float column as little-endian
6720/// i64, producing nonsense comparisons.
6721fn build_int_leaf(
6722    left: &Expr,
6723    op: BinOp,
6724    right: &Expr,
6725    columns: &[String],
6726    layout: &FastLayout,
6727    schema: &Schema,
6728) -> Option<CompiledLeaf> {
6729    let (field_name, literal_val, op) = match (left, right) {
6730        (Expr::Field(name), Expr::Literal(Literal::Int(v))) => (name, *v, op),
6731        (Expr::Literal(Literal::Int(v)), Expr::Field(name)) => {
6732            let flipped = match op {
6733                BinOp::Lt => BinOp::Gt,
6734                BinOp::Gt => BinOp::Lt,
6735                BinOp::Lte => BinOp::Gte,
6736                BinOp::Gte => BinOp::Lte,
6737                other => other, // Eq, Neq are symmetric
6738            };
6739            (name, *v, flipped)
6740        }
6741        _ => return None,
6742    };
6743
6744    let col_idx = columns.iter().position(|c| c == field_name)?;
6745    // Guard: the compiled Int leaf reads the column's 8 bytes as i64.
6746    // Only valid when the column is actually an Int column.
6747    if schema.columns[col_idx].type_id != TypeId::Int {
6748        return None;
6749    }
6750    let byte_offset = layout.fixed_offsets[col_idx]?;
6751    let bitmap_byte = col_idx / 8;
6752    let bitmap_bit = (col_idx % 8) as u8;
6753    let data_offset = 2 + layout.bitmap_size + byte_offset;
6754
6755    Some(CompiledLeaf::Int {
6756        data_offset,
6757        bitmap_byte,
6758        bitmap_bit,
6759        op,
6760        literal: literal_val,
6761    })
6762}
6763
6764/// Build a `Float` leaf from `.field <op> literal` where `.field` is a
6765/// Float column and `literal` is numeric (Float or Int — Int literals are
6766/// promoted to `f64` at compile time so the hot loop only sees one shape).
6767///
6768/// Mission D10: adds the Float fast-path counterpart to `build_int_leaf`.
6769/// Without this, `WHERE .price > 100.0` on a `price: float` column falls
6770/// through `compile_predicate`, forcing the whole query to the generic
6771/// `decode_row → Value::cmp` path which allocates a `Vec<Value>` per row.
6772fn build_float_leaf(
6773    left: &Expr,
6774    op: BinOp,
6775    right: &Expr,
6776    columns: &[String],
6777    layout: &FastLayout,
6778    schema: &Schema,
6779) -> Option<CompiledLeaf> {
6780    // Accept either direction: field-op-literal or literal-op-field.
6781    // When the literal is on the left, flip the operator so the hot-loop
6782    // eval can assume the field is always the LHS.
6783    let (field_name, literal_val, op) = match (left, right) {
6784        (Expr::Field(name), Expr::Literal(Literal::Float(v))) => (name, *v, op),
6785        (Expr::Field(name), Expr::Literal(Literal::Int(v))) => (name, *v as f64, op),
6786        (Expr::Literal(Literal::Float(v)), Expr::Field(name)) => {
6787            let flipped = match op {
6788                BinOp::Lt => BinOp::Gt,
6789                BinOp::Gt => BinOp::Lt,
6790                BinOp::Lte => BinOp::Gte,
6791                BinOp::Gte => BinOp::Lte,
6792                other => other,
6793            };
6794            (name, *v, flipped)
6795        }
6796        (Expr::Literal(Literal::Int(v)), Expr::Field(name)) => {
6797            let flipped = match op {
6798                BinOp::Lt => BinOp::Gt,
6799                BinOp::Gt => BinOp::Lt,
6800                BinOp::Lte => BinOp::Gte,
6801                BinOp::Gte => BinOp::Lte,
6802                other => other,
6803            };
6804            (name, *v as f64, flipped)
6805        }
6806        _ => return None,
6807    };
6808
6809    let col_idx = columns.iter().position(|c| c == field_name)?;
6810    // Symmetric guard to build_int_leaf: only fire on Float columns. If
6811    // the column is Int but the literal was Float, we want the generic
6812    // path (which promotes Int → f64 via Value::cmp) — compiling a
6813    // Float leaf would read the i64 bytes as f64 and produce nonsense.
6814    if schema.columns[col_idx].type_id != TypeId::Float {
6815        return None;
6816    }
6817    let byte_offset = layout.fixed_offsets[col_idx]?;
6818    let bitmap_byte = col_idx / 8;
6819    let bitmap_bit = (col_idx % 8) as u8;
6820    let data_offset = 2 + layout.bitmap_size + byte_offset;
6821
6822    Some(CompiledLeaf::Float {
6823        data_offset,
6824        bitmap_byte,
6825        bitmap_bit,
6826        op,
6827        literal: literal_val,
6828    })
6829}
6830
6831/// Build a `StrEq` leaf from `.field = string_literal` (or reversed).
6832fn build_str_eq_leaf(
6833    left: &Expr,
6834    op: BinOp,
6835    right: &Expr,
6836    columns: &[String],
6837    layout: &FastLayout,
6838    schema: &Schema,
6839) -> Option<CompiledLeaf> {
6840    if op != BinOp::Eq && op != BinOp::Neq {
6841        return None;
6842    }
6843    let (field_name, literal_str) = match (left, right) {
6844        (Expr::Field(name), Expr::Literal(Literal::String(s))) => (name, s.clone()),
6845        (Expr::Literal(Literal::String(s)), Expr::Field(name)) => (name, s.clone()),
6846        _ => return None,
6847    };
6848
6849    let col_idx = columns.iter().position(|c| c == field_name)?;
6850    if schema.columns[col_idx].type_id != TypeId::Str {
6851        return None;
6852    }
6853    let var_idx = layout.var_indices[col_idx]?;
6854    let var_offset_table_start = layout.var_offset_table_start();
6855    let var_data_start = layout.var_data_start();
6856    let bitmap_byte = col_idx / 8;
6857    let bitmap_bit = (col_idx % 8) as u8;
6858    let negate = op == BinOp::Neq;
6859
6860    Some(CompiledLeaf::StrEq {
6861        var_offset_table_start,
6862        var_data_start,
6863        var_idx,
6864        bitmap_byte,
6865        bitmap_bit,
6866        negate,
6867        needle: literal_str.into_bytes(),
6868    })
6869}
6870
6871/// Collect the column indices referenced by a predicate expression.
6872fn predicate_column_indices(expr: &Expr, columns: &[String]) -> Vec<usize> {
6873    let mut indices = Vec::new();
6874    collect_field_indices(expr, columns, &mut indices);
6875    indices.sort_unstable();
6876    indices.dedup();
6877    indices
6878}
6879
6880fn collect_field_indices(expr: &Expr, columns: &[String], out: &mut Vec<usize>) {
6881    match expr {
6882        Expr::Field(name) => {
6883            if let Some(idx) = columns.iter().position(|c| c == name) {
6884                out.push(idx);
6885            }
6886        }
6887        Expr::BinaryOp(left, _, right) => {
6888            collect_field_indices(left, columns, out);
6889            collect_field_indices(right, columns, out);
6890        }
6891        Expr::Coalesce(left, right) => {
6892            collect_field_indices(left, columns, out);
6893            collect_field_indices(right, columns, out);
6894        }
6895        Expr::UnaryOp(_, inner) => {
6896            collect_field_indices(inner, columns, out);
6897        }
6898        Expr::FunctionCall(_, inner) => {
6899            collect_field_indices(inner, columns, out);
6900        }
6901        Expr::ScalarFunc(_, args) => {
6902            for arg in args {
6903                collect_field_indices(arg, columns, out);
6904            }
6905        }
6906        Expr::Cast(inner, _) => {
6907            collect_field_indices(inner, columns, out);
6908        }
6909        Expr::Case { whens, else_expr } => {
6910            for (cond, result) in whens {
6911                collect_field_indices(cond, columns, out);
6912                collect_field_indices(result, columns, out);
6913            }
6914            if let Some(e) = else_expr {
6915                collect_field_indices(e, columns, out);
6916            }
6917        }
6918        Expr::InList { expr, list, .. } => {
6919            collect_field_indices(expr, columns, out);
6920            for item in list {
6921                collect_field_indices(item, columns, out);
6922            }
6923        }
6924        Expr::InSubquery { expr, .. } => {
6925            collect_field_indices(expr, columns, out);
6926        }
6927        _ => {}
6928    }
6929}
6930
6931/// Decode only the specified columns from raw row bytes, filling the rest
6932/// with `Value::Empty`. This avoids heap allocations for String/Bytes
6933/// columns that the predicate doesn't reference.
6934fn decode_selective(
6935    schema: &Schema,
6936    layout: &RowLayout,
6937    data: &[u8],
6938    col_indices: &[usize],
6939) -> Vec<Value> {
6940    let n_cols = schema.columns.len();
6941    let mut values = vec![Value::Empty; n_cols];
6942    for &ci in col_indices {
6943        values[ci] = decode_column(schema, layout, data, ci);
6944    }
6945    values
6946}
6947
6948fn eval_binop(left: &Value, op: BinOp, right: &Value) -> Value {
6949    match op {
6950        BinOp::Eq => Value::Bool(left == right),
6951        BinOp::Neq => Value::Bool(left != right),
6952        BinOp::Lt => Value::Bool(left < right),
6953        BinOp::Gt => Value::Bool(left > right),
6954        BinOp::Lte => Value::Bool(left <= right),
6955        BinOp::Gte => Value::Bool(left >= right),
6956        BinOp::And => match (left, right) {
6957            (Value::Bool(a), Value::Bool(b)) => Value::Bool(*a && *b),
6958            _ => Value::Bool(false),
6959        },
6960        BinOp::Or => match (left, right) {
6961            (Value::Bool(a), Value::Bool(b)) => Value::Bool(*a || *b),
6962            _ => Value::Bool(false),
6963        },
6964        BinOp::Add => match (left, right) {
6965            (Value::Int(a), Value::Int(b)) => Value::Int(a.saturating_add(*b)),
6966            (Value::Float(a), Value::Float(b)) => Value::Float(a + b),
6967            (Value::Int(a), Value::Float(b)) => Value::Float(*a as f64 + b),
6968            (Value::Float(a), Value::Int(b)) => Value::Float(a + *b as f64),
6969            _ => Value::Empty,
6970        },
6971        BinOp::Sub => match (left, right) {
6972            (Value::Int(a), Value::Int(b)) => Value::Int(a.saturating_sub(*b)),
6973            (Value::Float(a), Value::Float(b)) => Value::Float(a - b),
6974            (Value::Int(a), Value::Float(b)) => Value::Float(*a as f64 - b),
6975            (Value::Float(a), Value::Int(b)) => Value::Float(a - *b as f64),
6976            _ => Value::Empty,
6977        },
6978        BinOp::Mul => match (left, right) {
6979            (Value::Int(a), Value::Int(b)) => Value::Int(a.saturating_mul(*b)),
6980            (Value::Float(a), Value::Float(b)) => Value::Float(a * b),
6981            (Value::Int(a), Value::Float(b)) => Value::Float(*a as f64 * b),
6982            (Value::Float(a), Value::Int(b)) => Value::Float(a * *b as f64),
6983            _ => Value::Empty,
6984        },
6985        BinOp::Div => match (left, right) {
6986            (Value::Int(a), Value::Int(b)) if *b != 0 => Value::Int(a / b),
6987            (Value::Float(a), Value::Float(b)) => Value::Float(a / b),
6988            (Value::Int(a), Value::Float(b)) => Value::Float(*a as f64 / b),
6989            (Value::Float(a), Value::Int(b)) => Value::Float(a / *b as f64),
6990            _ => Value::Empty,
6991        },
6992        BinOp::Like => match (left, right) {
6993            (Value::Str(text), Value::Str(pattern)) => Value::Bool(like_match(text, pattern)),
6994            _ => Value::Bool(false),
6995        },
6996    }
6997}
6998
6999/// SQL LIKE pattern match. `%` matches any sequence (including empty),
7000/// `_` matches exactly one character. No escape character for now.
7001fn like_match(text: &str, pattern: &str) -> bool {
7002    let t: Vec<char> = text.chars().collect();
7003    let p: Vec<char> = pattern.chars().collect();
7004    like_dp(&t, &p, 0, 0)
7005}
7006
7007fn like_dp(t: &[char], p: &[char], ti: usize, pi: usize) -> bool {
7008    if pi == p.len() {
7009        return ti == t.len();
7010    }
7011    if p[pi] == '%' {
7012        // '%' can match zero or more characters — try both.
7013        // Skip consecutive '%' to avoid exponential blowup.
7014        let mut pi2 = pi;
7015        while pi2 < p.len() && p[pi2] == '%' {
7016            pi2 += 1;
7017        }
7018        for i in ti..=t.len() {
7019            if like_dp(t, p, i, pi2) {
7020                return true;
7021            }
7022        }
7023        false
7024    } else if ti < t.len() && (p[pi] == '_' || p[pi] == t[ti]) {
7025        like_dp(t, p, ti + 1, pi + 1)
7026    } else {
7027        false
7028    }
7029}
7030
7031#[cfg(test)]
7032mod tests {
7033    use super::*;
7034    use std::sync::atomic::{AtomicU32, Ordering};
7035
7036    static TEST_COUNTER: AtomicU32 = AtomicU32::new(0);
7037
7038    fn test_engine() -> Engine {
7039        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
7040        let dir = std::env::temp_dir().join(format!("powdb_exec_{}_{}", std::process::id(), id));
7041        let mut engine = Engine::new(&dir).unwrap();
7042        engine
7043            .execute_powql("type User { required name: str, required email: str, age: int }")
7044            .unwrap();
7045        engine
7046            .execute_powql(r#"insert User { name := "Alice", email := "alice@ex.com", age := 30 }"#)
7047            .unwrap();
7048        engine
7049            .execute_powql(r#"insert User { name := "Bob", email := "bob@ex.com", age := 25 }"#)
7050            .unwrap();
7051        engine
7052            .execute_powql(
7053                r#"insert User { name := "Charlie", email := "charlie@ex.com", age := 35 }"#,
7054            )
7055            .unwrap();
7056        engine
7057    }
7058
7059    #[test]
7060    fn test_scan_all() {
7061        let mut engine = test_engine();
7062        let result = engine.execute_powql("User").unwrap();
7063        match result {
7064            QueryResult::Rows { rows, .. } => assert_eq!(rows.len(), 3),
7065            _ => panic!("expected rows"),
7066        }
7067    }
7068
7069    #[test]
7070    fn test_filter() {
7071        let mut engine = test_engine();
7072        let result = engine.execute_powql("User filter .age > 28").unwrap();
7073        match result {
7074            QueryResult::Rows { rows, .. } => {
7075                assert_eq!(rows.len(), 2); // Alice (30) and Charlie (35)
7076            }
7077            _ => panic!("expected rows"),
7078        }
7079    }
7080
7081    #[test]
7082    fn test_projection() {
7083        let mut engine = test_engine();
7084        let result = engine.execute_powql("User { name }").unwrap();
7085        match result {
7086            QueryResult::Rows { columns, rows } => {
7087                assert_eq!(columns, vec!["name"]);
7088                assert_eq!(rows.len(), 3);
7089            }
7090            _ => panic!("expected rows"),
7091        }
7092    }
7093
7094    #[test]
7095    fn test_insert_and_count() {
7096        let mut engine = test_engine();
7097        let result = engine.execute_powql("count(User)").unwrap();
7098        match result {
7099            QueryResult::Scalar(Value::Int(n)) => assert_eq!(n, 3),
7100            _ => panic!("expected scalar int"),
7101        }
7102    }
7103
7104    #[test]
7105    fn test_update() {
7106        let mut engine = test_engine();
7107        engine
7108            .execute_powql(r#"User filter .name = "Alice" update { age := 31 }"#)
7109            .unwrap();
7110        let result = engine
7111            .execute_powql(r#"User filter .name = "Alice" { name, age }"#)
7112            .unwrap();
7113        match result {
7114            QueryResult::Rows { rows, .. } => {
7115                assert_eq!(rows[0][1], Value::Int(31));
7116            }
7117            _ => panic!("expected rows"),
7118        }
7119    }
7120
7121    #[test]
7122    fn test_delete() {
7123        let mut engine = test_engine();
7124        engine
7125            .execute_powql(r#"User filter .name = "Bob" delete"#)
7126            .unwrap();
7127        let result = engine.execute_powql("count(User)").unwrap();
7128        match result {
7129            QueryResult::Scalar(Value::Int(n)) => assert_eq!(n, 2),
7130            _ => panic!("expected scalar int"),
7131        }
7132    }
7133
7134    #[test]
7135    fn test_order_limit() {
7136        let mut engine = test_engine();
7137        let result = engine
7138            .execute_powql("User order .age desc limit 2 { name, age }")
7139            .unwrap();
7140        match result {
7141            QueryResult::Rows { rows, .. } => {
7142                assert_eq!(rows.len(), 2);
7143                assert_eq!(rows[0][0], Value::Str("Charlie".into())); // age 35
7144                assert_eq!(rows[1][0], Value::Str("Alice".into())); // age 30
7145            }
7146            _ => panic!("expected rows"),
7147        }
7148    }
7149
7150    /// `order` by a non-existent column must surface an error, not panic.
7151    /// Regression for executor.rs:998 / :2560 where the Sort node called
7152    /// `unwrap_or_else(|| panic!(...))` — a malformed ORDER BY would crash
7153    /// the server thread instead of returning an error to the client.
7154    #[test]
7155    fn test_order_by_missing_column_errors() {
7156        let mut engine = test_engine();
7157        let err = engine
7158            .execute_powql("User order .nonexistent desc")
7159            .expect_err("sort on missing column must error, not panic");
7160        assert!(
7161            err.contains("nonexistent"),
7162            "error should name the missing column, got: {err}"
7163        );
7164    }
7165
7166    // ─── LIMIT / OFFSET combined semantics ──────────────────────────────────
7167    //
7168    // SQL/PowQL semantics: offset skips M rows first, then limit takes N rows.
7169    // `limit 3 offset 1` on 5 rows must return rows 1..4 (three rows), not
7170    // `N - M` rows. These regression tests pin the plan-shape ordering that
7171    // previously had Offset wrapping Limit (so Limit capped at N rows and
7172    // Offset then skipped M of those, yielding N - M).
7173
7174    /// 5-row Product fixture with an `id` column we can order on.
7175    fn product_engine() -> Engine {
7176        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
7177        let dir =
7178            std::env::temp_dir().join(format!("powdb_limit_offset_{}_{}", std::process::id(), id));
7179        let mut engine = Engine::new(&dir).unwrap();
7180        engine
7181            .execute_powql("type Product { required id: int, required name: str }")
7182            .unwrap();
7183        for i in 0..5i64 {
7184            let q = format!(r#"insert Product {{ id := {i}, name := "p{i}" }}"#);
7185            engine.execute_powql(&q).unwrap();
7186        }
7187        engine
7188    }
7189
7190    #[test]
7191    fn test_limit_offset_combined() {
7192        // 5 rows, `limit 3 offset 1` → exactly 3 rows, ids [1, 2, 3] when
7193        // ordered by id. We order by id to pin the row identity; without
7194        // an order by, insertion order is implementation-defined.
7195        let mut engine = product_engine();
7196        let result = engine
7197            .execute_powql("Product order .id limit 3 offset 1 { .id }")
7198            .unwrap();
7199        match result {
7200            QueryResult::Rows { rows, .. } => {
7201                assert_eq!(
7202                    rows.len(),
7203                    3,
7204                    "limit 3 offset 1 on 5 rows must return 3 rows"
7205                );
7206                assert_eq!(rows[0][0], Value::Int(1));
7207                assert_eq!(rows[1][0], Value::Int(2));
7208                assert_eq!(rows[2][0], Value::Int(3));
7209            }
7210            _ => panic!("expected rows"),
7211        }
7212
7213        // `limit 2 offset 1` → exactly 2 rows, ids [1, 2].
7214        let result = engine
7215            .execute_powql("Product order .id limit 2 offset 1 { .id }")
7216            .unwrap();
7217        match result {
7218            QueryResult::Rows { rows, .. } => {
7219                assert_eq!(
7220                    rows.len(),
7221                    2,
7222                    "limit 2 offset 1 on 5 rows must return 2 rows"
7223                );
7224                assert_eq!(rows[0][0], Value::Int(1));
7225                assert_eq!(rows[1][0], Value::Int(2));
7226            }
7227            _ => panic!("expected rows"),
7228        }
7229    }
7230
7231    #[test]
7232    fn test_limit_offset_combined_with_order() {
7233        // Same semantics but ordering on a string column. Names are p0..p4,
7234        // so sort order is identical to id order.
7235        let mut engine = product_engine();
7236        let result = engine
7237            .execute_powql("Product order .name limit 3 offset 1 { .name }")
7238            .unwrap();
7239        match result {
7240            QueryResult::Rows { rows, .. } => {
7241                assert_eq!(rows.len(), 3);
7242                assert_eq!(rows[0][0], Value::Str("p1".into()));
7243                assert_eq!(rows[1][0], Value::Str("p2".into()));
7244                assert_eq!(rows[2][0], Value::Str("p3".into()));
7245            }
7246            _ => panic!("expected rows"),
7247        }
7248    }
7249
7250    #[test]
7251    fn test_offset_then_limit_keyword_order() {
7252        // Parser accepts limit/offset in either order — verify the plan
7253        // semantics are identical regardless of keyword order.
7254        let mut engine = product_engine();
7255        let result = engine
7256            .execute_powql("Product order .id offset 1 limit 3 { .id }")
7257            .unwrap();
7258        match result {
7259            QueryResult::Rows { rows, .. } => {
7260                assert_eq!(rows.len(), 3);
7261                assert_eq!(rows[0][0], Value::Int(1));
7262                assert_eq!(rows[1][0], Value::Int(2));
7263                assert_eq!(rows[2][0], Value::Int(3));
7264            }
7265            _ => panic!("expected rows"),
7266        }
7267    }
7268
7269    // ─── Mission A fast-path tests ──────────────────────────────────────────
7270    //
7271    // Fixture: Mission A workload schema — the same User shape used by
7272    // crates/compare. Deterministic generator so expected values are
7273    // computable directly in the test without reimplementing the interpreter.
7274
7275    /// Build a Mission A User table with `n` rows and an index on id.
7276    /// Row i (0-indexed, id = i):
7277    ///   id        = i
7278    ///   name      = format!("user_{i}")
7279    ///   age       = 18 + (i % 60)
7280    ///   status    = ["active","inactive","pending"][i % 3]
7281    ///   email     = format!("user_{i}@example.com")
7282    ///   created_at= 1_700_000_000 + i
7283    fn mission_a_engine(n: i64) -> Engine {
7284        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
7285        let dir =
7286            std::env::temp_dir().join(format!("powdb_mission_a_{}_{}", std::process::id(), id));
7287        let mut engine = Engine::new(&dir).unwrap();
7288        engine
7289            .execute_powql(
7290                "type User { required id: int, required name: str, required age: int, \
7291             required status: str, required email: str, required created_at: int }",
7292            )
7293            .unwrap();
7294        engine.catalog_mut().create_index("User", "id").unwrap();
7295        let statuses = ["active", "inactive", "pending"];
7296        for i in 0..n {
7297            let age = 18 + (i % 60);
7298            let status = statuses[(i as usize) % 3];
7299            let created_at = 1_700_000_000_i64 + i;
7300            let q = format!(
7301                r#"insert User {{ id := {i}, name := "user_{i}", age := {age}, status := "{status}", email := "user_{i}@example.com", created_at := {created_at} }}"#
7302            );
7303            engine.execute_powql(&q).unwrap();
7304        }
7305        engine
7306    }
7307
7308    #[test]
7309    fn test_fastpath_point_lookup_nonindexed() {
7310        // `.email = literal` has no index — must short-circuit via compiled
7311        // predicate on the first match.
7312        let mut engine = mission_a_engine(50);
7313        let result = engine
7314            .execute_powql(r#"User filter .email = "user_17@example.com""#)
7315            .unwrap();
7316        match result {
7317            QueryResult::Rows { rows, .. } => {
7318                assert_eq!(rows.len(), 1);
7319                // id column is position 0
7320                assert_eq!(rows[0][0], Value::Int(17));
7321            }
7322            _ => panic!("expected rows"),
7323        }
7324    }
7325
7326    #[test]
7327    fn test_fastpath_scan_filter_project_top100() {
7328        // Project(Limit(Filter(SeqScan))) — stream, stop at 100.
7329        let mut engine = mission_a_engine(1000);
7330        let result = engine
7331            .execute_powql("User filter .age > 30 limit 100 { .id, .name }")
7332            .unwrap();
7333        match result {
7334            QueryResult::Rows { columns, rows } => {
7335                assert_eq!(columns, vec!["id", "name"]);
7336                assert_eq!(rows.len(), 100);
7337                // All rows must have age > 30 (age = 18 + (id % 60))
7338                // Verify via id: 18 + (id % 60) > 30  <=>  id % 60 > 12
7339                for row in &rows {
7340                    if let Value::Int(id) = row[0] {
7341                        assert!(18 + (id % 60) > 30, "id={id} has age={}", 18 + (id % 60));
7342                    } else {
7343                        panic!("expected int id");
7344                    }
7345                }
7346            }
7347            _ => panic!("expected rows"),
7348        }
7349    }
7350
7351    #[test]
7352    fn test_fastpath_scan_filter_sort_limit10_desc() {
7353        // Project(Limit(Sort(Filter(SeqScan)))) — bounded top-N heap desc.
7354        let mut engine = mission_a_engine(500);
7355        let result = engine
7356            .execute_powql(
7357                "User filter .age > 20 order .created_at desc limit 10 { .id, .created_at }",
7358            )
7359            .unwrap();
7360        match result {
7361            QueryResult::Rows { rows, .. } => {
7362                assert_eq!(rows.len(), 10);
7363                // Must be monotonically non-increasing in created_at.
7364                let keys: Vec<i64> = rows
7365                    .iter()
7366                    .map(|r| {
7367                        if let Value::Int(v) = r[1] {
7368                            v
7369                        } else {
7370                            panic!("expected int");
7371                        }
7372                    })
7373                    .collect();
7374                for w in keys.windows(2) {
7375                    assert!(w[0] >= w[1], "not desc sorted: {keys:?}");
7376                }
7377                // Highest created_at is id=499 (created_at=1_700_000_499),
7378                // age=18+(499%60)=37 which is > 20, so id=499 must be first.
7379                assert_eq!(rows[0][0], Value::Int(499));
7380            }
7381            _ => panic!("expected rows"),
7382        }
7383    }
7384
7385    #[test]
7386    fn test_fastpath_scan_filter_sort_limit10_asc() {
7387        let mut engine = mission_a_engine(500);
7388        let result = engine
7389            .execute_powql("User filter .age > 20 order .created_at limit 10 { .id, .created_at }")
7390            .unwrap();
7391        match result {
7392            QueryResult::Rows { rows, .. } => {
7393                assert_eq!(rows.len(), 10);
7394                let keys: Vec<i64> = rows
7395                    .iter()
7396                    .map(|r| {
7397                        if let Value::Int(v) = r[1] {
7398                            v
7399                        } else {
7400                            panic!("expected int");
7401                        }
7402                    })
7403                    .collect();
7404                for w in keys.windows(2) {
7405                    assert!(w[0] <= w[1], "not asc sorted: {keys:?}");
7406                }
7407            }
7408            _ => panic!("expected rows"),
7409        }
7410    }
7411
7412    #[test]
7413    fn test_fastpath_agg_sum() {
7414        // sum over all rows of the age column. Deterministic expected value.
7415        let n: i64 = 300;
7416        let mut engine = mission_a_engine(n);
7417        let result = engine.execute_powql("sum(User { .age })").unwrap();
7418        let expected: i64 = (0..n).map(|i| 18 + (i % 60)).sum();
7419        match result {
7420            QueryResult::Scalar(Value::Int(v)) => assert_eq!(v, expected),
7421            other => panic!("expected Int, got {other:?}"),
7422        }
7423    }
7424
7425    #[test]
7426    fn test_fastpath_agg_sum_with_filter() {
7427        let n: i64 = 300;
7428        let mut engine = mission_a_engine(n);
7429        let result = engine
7430            .execute_powql("sum(User filter .age > 30 { .age })")
7431            .unwrap();
7432        let expected: i64 = (0..n).map(|i| 18 + (i % 60)).filter(|a| *a > 30).sum();
7433        match result {
7434            QueryResult::Scalar(Value::Int(v)) => assert_eq!(v, expected),
7435            other => panic!("expected Int, got {other:?}"),
7436        }
7437    }
7438
7439    #[test]
7440    fn test_fastpath_agg_avg() {
7441        let n: i64 = 300;
7442        let mut engine = mission_a_engine(n);
7443        let result = engine.execute_powql("avg(User { .age })").unwrap();
7444        let total: f64 = (0..n).map(|i| (18 + (i % 60)) as f64).sum();
7445        let expected = total / n as f64;
7446        match result {
7447            QueryResult::Scalar(Value::Float(v)) => {
7448                assert!((v - expected).abs() < 1e-9, "expected {expected}, got {v}");
7449            }
7450            other => panic!("expected Float, got {other:?}"),
7451        }
7452    }
7453
7454    #[test]
7455    fn test_fastpath_agg_min_max() {
7456        let n: i64 = 300;
7457        let mut engine = mission_a_engine(n);
7458        // age = 18 + (i % 60), so min=18 and max=77 (18+59)
7459        let result_min = engine.execute_powql("min(User { .age })").unwrap();
7460        match result_min {
7461            QueryResult::Scalar(Value::Int(v)) => assert_eq!(v, 18),
7462            other => panic!("expected Int, got {other:?}"),
7463        }
7464        let result_max = engine.execute_powql("max(User { .age })").unwrap();
7465        match result_max {
7466            QueryResult::Scalar(Value::Int(v)) => assert_eq!(v, 77),
7467            other => panic!("expected Int, got {other:?}"),
7468        }
7469    }
7470
7471    #[test]
7472    fn test_fastpath_multi_col_and_filter() {
7473        // AND of int > and string = — both must be compiled into one closure.
7474        let n: i64 = 300;
7475        let mut engine = mission_a_engine(n);
7476        let result = engine
7477            .execute_powql(r#"count(User filter .age > 30 and .status = "active")"#)
7478            .unwrap();
7479        // Expected count via the same deterministic generator.
7480        let statuses = ["active", "inactive", "pending"];
7481        let expected = (0..n)
7482            .filter(|i| {
7483                let age = 18 + (i % 60);
7484                let status = statuses[(*i as usize) % 3];
7485                age > 30 && status == "active"
7486            })
7487            .count() as i64;
7488        match result {
7489            QueryResult::Scalar(Value::Int(v)) => assert_eq!(v, expected),
7490            other => panic!("expected Int, got {other:?}"),
7491        }
7492    }
7493
7494    #[test]
7495    fn test_fastpath_update_by_pk() {
7496        // Update(IndexScan) — single-row mutation via B-tree lookup.
7497        let mut engine = mission_a_engine(50);
7498        let result = engine
7499            .execute_powql("User filter .id = 25 update { age := 99 }")
7500            .unwrap();
7501        match result {
7502            QueryResult::Modified(n) => assert_eq!(n, 1),
7503            _ => panic!("expected Modified"),
7504        }
7505        // Verify the row has the new age.
7506        let lookup = engine
7507            .execute_powql("User filter .id = 25 { .age }")
7508            .unwrap();
7509        match lookup {
7510            QueryResult::Rows { rows, .. } => {
7511                assert_eq!(rows.len(), 1);
7512                assert_eq!(rows[0][0], Value::Int(99));
7513            }
7514            _ => panic!("expected rows"),
7515        }
7516        // Verify no neighbouring rows were touched.
7517        let neighbour = engine
7518            .execute_powql("User filter .id = 24 { .age }")
7519            .unwrap();
7520        if let QueryResult::Rows { rows, .. } = neighbour {
7521            assert_eq!(rows[0][0], Value::Int(42));
7522        }
7523    }
7524
7525    #[test]
7526    fn test_fastpath_update_by_filter_single_pass() {
7527        // Regression test for the O(N*M) bug: update by a range filter must
7528        // not take quadratic time. We can't directly assert timing, but we
7529        // can assert correctness and that the call completes for a
7530        // reasonably-sized table (the old path at N=2000 was ~40M row-eq
7531        // comparisons; the new path is O(N)).
7532        let n: i64 = 2000;
7533        let mut engine = mission_a_engine(n);
7534        let result = engine
7535            .execute_powql("User filter .age > 50 update { age := 5 }")
7536            .unwrap();
7537        let expected = (0..n).filter(|i| 18 + (i % 60) > 50).count() as u64;
7538        match result {
7539            QueryResult::Modified(nn) => assert_eq!(nn, expected),
7540            _ => panic!("expected Modified"),
7541        }
7542        // Every row that matched the filter now has age=5. We verify both
7543        // directions:
7544        //   (a) no rows remain with age > 50 (the filter predicate)
7545        //   (b) count(age = 5) equals the number of rows we updated
7546        // Note: the original generator never produces age=5, so count(age=5)
7547        // is exactly the number of updated rows.
7548        let check_zero = engine
7549            .execute_powql(r#"count(User filter .age > 50)"#)
7550            .unwrap();
7551        match check_zero {
7552            QueryResult::Scalar(Value::Int(v)) => assert_eq!(v, 0, "some rows still have age > 50"),
7553            _ => panic!("expected Int"),
7554        }
7555        let check_five = engine
7556            .execute_powql(r#"count(User filter .age = 5)"#)
7557            .unwrap();
7558        match check_five {
7559            QueryResult::Scalar(Value::Int(v)) => assert_eq!(v as u64, expected),
7560            _ => panic!("expected Int"),
7561        }
7562        // Total row count unchanged.
7563        let total = engine.execute_powql("count(User)").unwrap();
7564        match total {
7565            QueryResult::Scalar(Value::Int(v)) => assert_eq!(v, n),
7566            _ => panic!("expected Int"),
7567        }
7568    }
7569
7570    #[test]
7571    fn test_fastpath_delete_by_filter_single_pass() {
7572        let n: i64 = 2000;
7573        let mut engine = mission_a_engine(n);
7574        let to_delete = (0..n).filter(|i| 18 + (i % 60) > 60).count() as u64;
7575        let result = engine
7576            .execute_powql("User filter .age > 60 delete")
7577            .unwrap();
7578        match result {
7579            QueryResult::Modified(nn) => assert_eq!(nn, to_delete),
7580            _ => panic!("expected Modified"),
7581        }
7582        let count = engine.execute_powql("count(User)").unwrap();
7583        match count {
7584            QueryResult::Scalar(Value::Int(v)) => assert_eq!(v as u64, n as u64 - to_delete),
7585            _ => panic!("expected Int"),
7586        }
7587    }
7588
7589    #[test]
7590    fn test_fastpath_delete_by_pk() {
7591        let mut engine = mission_a_engine(30);
7592        let result = engine.execute_powql("User filter .id = 7 delete").unwrap();
7593        match result {
7594            QueryResult::Modified(n) => assert_eq!(n, 1),
7595            _ => panic!("expected Modified"),
7596        }
7597        // The deleted row must be gone.
7598        let lookup = engine.execute_powql("User filter .id = 7").unwrap();
7599        match lookup {
7600            QueryResult::Rows { rows, .. } => assert_eq!(rows.len(), 0),
7601            _ => panic!("expected rows"),
7602        }
7603        // Neighbours still present.
7604        let other = engine.execute_powql("User filter .id = 8 { .id }").unwrap();
7605        match other {
7606            QueryResult::Rows { rows, .. } => {
7607                assert_eq!(rows.len(), 1);
7608                assert_eq!(rows[0][0], Value::Int(8));
7609            }
7610            _ => panic!("expected rows"),
7611        }
7612    }
7613
7614    #[test]
7615    fn test_fastpath_update_by_filter_matches_generic() {
7616        // Cross-check: running the fast-path update and counting the
7617        // modified rows must agree with counting matching rows via a
7618        // separate query. This catches off-by-one bugs in rid collection.
7619        let n: i64 = 500;
7620        let mut engine = mission_a_engine(n);
7621        let count_before = engine
7622            .execute_powql(r#"count(User filter .status = "active")"#)
7623            .unwrap();
7624        let expected_count = match count_before {
7625            QueryResult::Scalar(Value::Int(v)) => v as u64,
7626            _ => panic!("expected Int"),
7627        };
7628
7629        let upd = engine
7630            .execute_powql(r#"User filter .status = "active" update { age := 42 }"#)
7631            .unwrap();
7632        match upd {
7633            QueryResult::Modified(n) => assert_eq!(n, expected_count),
7634            _ => panic!("expected Modified"),
7635        }
7636
7637        // All "active" rows now have age = 42.
7638        let count_after = engine
7639            .execute_powql(r#"count(User filter .age = 42)"#)
7640            .unwrap();
7641        match count_after {
7642            QueryResult::Scalar(Value::Int(v)) => {
7643                // Some non-active rows may also happen to have age = 42 from
7644                // the original schedule (age = 18 + (i % 60) == 42 when
7645                // i % 60 == 24). So we assert >= expected_count.
7646                assert!(v as u64 >= expected_count);
7647            }
7648            _ => panic!("expected Int"),
7649        }
7650    }
7651
7652    // ── Mission C Phase 5: prepared statements ────────────────────
7653
7654    #[test]
7655    fn test_prepared_insert_reuses_template() {
7656        let mut engine = test_engine();
7657        let prep = engine
7658            .prepare(r#"insert User { name := "seed", email := "seed@ex.com", age := 0 }"#)
7659            .expect("prepare");
7660        // The template has 3 literal slots: name, email, age.
7661        assert_eq!(prep.param_count, 3);
7662
7663        for i in 0..5 {
7664            engine
7665                .execute_prepared(
7666                    &prep,
7667                    &[
7668                        Literal::String(format!("user{i}")),
7669                        Literal::String(format!("u{i}@ex.com")),
7670                        Literal::Int(20 + i as i64),
7671                    ],
7672                )
7673                .expect("execute_prepared");
7674        }
7675
7676        // 3 seeded + 5 prepared inserts = 8 rows.
7677        let count = engine.execute_powql("count(User)").unwrap();
7678        match count {
7679            QueryResult::Scalar(Value::Int(n)) => assert_eq!(n, 8),
7680            _ => panic!("expected scalar"),
7681        }
7682    }
7683
7684    #[test]
7685    fn test_prepared_update_by_pk() {
7686        let mut engine = test_engine();
7687        let prep = engine
7688            .prepare(r#"User filter .name = "seed" update { age := 0 }"#)
7689            .expect("prepare");
7690        // Two slots: filter literal "seed" + assignment literal 0.
7691        assert_eq!(prep.param_count, 2);
7692
7693        engine
7694            .execute_prepared(&prep, &[Literal::String("Alice".into()), Literal::Int(99)])
7695            .expect("execute_prepared");
7696
7697        let result = engine
7698            .execute_powql(r#"User filter .name = "Alice" { age }"#)
7699            .unwrap();
7700        match result {
7701            QueryResult::Rows { rows, .. } => {
7702                assert_eq!(rows[0][0], Value::Int(99));
7703            }
7704            _ => panic!("expected rows"),
7705        }
7706    }
7707
7708    #[test]
7709    fn test_prepared_wrong_arity_errors() {
7710        let mut engine = test_engine();
7711        let prep = engine
7712            .prepare(r#"User filter .age > 0 { name }"#)
7713            .expect("prepare");
7714        assert_eq!(prep.param_count, 1);
7715        let err = engine.execute_prepared(&prep, &[]).unwrap_err();
7716        assert!(err.contains("expects 1 literal"));
7717    }
7718
7719    // ─── Mission E1.2 join executor tests ───────────────────────────────────
7720    //
7721    // Fixture: two-table User + Order schema. User has 3 rows; Order has 4
7722    // rows referencing users 1 and 2 (plus one orphan user_id 99 so we can
7723    // probe LEFT OUTER semantics). Charlie (user 3) has no orders.
7724
7725    fn join_engine() -> Engine {
7726        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
7727        let dir = std::env::temp_dir().join(format!("powdb_join_{}_{}", std::process::id(), id));
7728        let mut engine = Engine::new(&dir).unwrap();
7729        engine
7730            .execute_powql("type User { required id: int, required name: str }")
7731            .unwrap();
7732        engine
7733            .execute_powql(
7734                "type Order { required id: int, required user_id: int, required total: int }",
7735            )
7736            .unwrap();
7737        engine
7738            .execute_powql(r#"insert User { id := 1, name := "Alice" }"#)
7739            .unwrap();
7740        engine
7741            .execute_powql(r#"insert User { id := 2, name := "Bob" }"#)
7742            .unwrap();
7743        engine
7744            .execute_powql(r#"insert User { id := 3, name := "Charlie" }"#)
7745            .unwrap();
7746        engine
7747            .execute_powql(r#"insert Order { id := 10, user_id := 1, total := 100 }"#)
7748            .unwrap();
7749        engine
7750            .execute_powql(r#"insert Order { id := 11, user_id := 1, total := 200 }"#)
7751            .unwrap();
7752        engine
7753            .execute_powql(r#"insert Order { id := 12, user_id := 2, total := 50  }"#)
7754            .unwrap();
7755        engine
7756            .execute_powql(r#"insert Order { id := 13, user_id := 99, total := 999 }"#)
7757            .unwrap();
7758        engine
7759    }
7760
7761    #[test]
7762    fn test_inner_join_matches_rows() {
7763        let mut engine = join_engine();
7764        let result = engine
7765            .execute_powql("User as u join Order as o on u.id = o.user_id")
7766            .unwrap();
7767        match result {
7768            QueryResult::Rows { columns, rows } => {
7769                // 3 matches: Alice has 2 orders, Bob has 1. Charlie + orphan
7770                // are dropped under INNER semantics.
7771                assert_eq!(rows.len(), 3);
7772                // Columns are concatenated alias.field for both sides.
7773                assert!(columns.contains(&"u.id".to_string()));
7774                assert!(columns.contains(&"u.name".to_string()));
7775                assert!(columns.contains(&"o.id".to_string()));
7776                assert!(columns.contains(&"o.user_id".to_string()));
7777                assert!(columns.contains(&"o.total".to_string()));
7778            }
7779            _ => panic!("expected rows"),
7780        }
7781    }
7782
7783    #[test]
7784    fn test_inner_join_with_qualified_projection_and_filter() {
7785        let mut engine = join_engine();
7786        let result = engine
7787            .execute_powql(
7788                "User as u join Order as o on u.id = o.user_id \
7789             filter o.total > 75 { u.name, o.total }",
7790            )
7791            .unwrap();
7792        match result {
7793            QueryResult::Rows { columns, rows } => {
7794                assert_eq!(columns, vec!["u.name", "o.total"]);
7795                // Alice/100, Alice/200 (Bob's 50 filtered out).
7796                assert_eq!(rows.len(), 2);
7797                let names: Vec<_> = rows.iter().map(|r| r[0].clone()).collect();
7798                assert!(names
7799                    .iter()
7800                    .all(|v| matches!(v, Value::Str(s) if s == "Alice")));
7801            }
7802            _ => panic!("expected rows"),
7803        }
7804    }
7805
7806    #[test]
7807    fn test_join_projection_with_aliased_right_table_column() {
7808        // Regression: the TS client reported right-table projections being
7809        // silently dropped. Confirm that `{ u.name, tot: o.total }` emits
7810        // both columns (the right-table one under its explicit alias).
7811        let mut engine = join_engine();
7812        let result = engine
7813            .execute_powql("User as u join Order as o on u.id = o.user_id { u.name, tot: o.total }")
7814            .unwrap();
7815        match result {
7816            QueryResult::Rows { columns, rows } => {
7817                assert_eq!(columns, vec!["u.name", "tot"]);
7818                assert_eq!(rows.len(), 3);
7819                // Every row must have a populated `tot` value (not Empty).
7820                for row in &rows {
7821                    assert!(
7822                        matches!(row[1], Value::Int(_)),
7823                        "tot should be Int, got {:?}",
7824                        row[1]
7825                    );
7826                }
7827            }
7828            _ => panic!("expected rows"),
7829        }
7830    }
7831
7832    #[test]
7833    fn test_match_keyword_rejected_as_invalid_join() {
7834        // `match` is not a join keyword in PowQL — only `join`, `inner join`,
7835        // `left join`, `right join`, and `cross join` are recognised. With
7836        // the parser's EOF check in place, writing `match` produces a clean
7837        // error instead of silently dropping the rest of the query.
7838        let mut engine = join_engine();
7839        let err = engine
7840            .execute_powql("User match Order on u.id = o.user_id { u.name }")
7841            .unwrap_err();
7842        assert!(
7843            err.to_string().to_lowercase().contains("match")
7844                || err.to_string().to_lowercase().contains("trailing")
7845                || err.to_string().to_lowercase().contains("unexpected"),
7846            "expected parse error mentioning trailing/unexpected token, got: {err}"
7847        );
7848    }
7849
7850    #[test]
7851    fn test_left_outer_join_emits_orphan_left_rows() {
7852        let mut engine = join_engine();
7853        let result = engine
7854            .execute_powql("User as u left join Order as o on u.id = o.user_id")
7855            .unwrap();
7856        match result {
7857            QueryResult::Rows { rows, columns } => {
7858                // Alice(2) + Bob(1) + Charlie(padding) = 4 rows.
7859                assert_eq!(rows.len(), 4);
7860                // Find Charlie's row and verify the right-side columns are Empty.
7861                let u_name_idx = columns.iter().position(|c| c == "u.name").unwrap();
7862                let o_total_idx = columns.iter().position(|c| c == "o.total").unwrap();
7863                let charlie = rows
7864                    .iter()
7865                    .find(|r| matches!(&r[u_name_idx], Value::Str(s) if s == "Charlie"))
7866                    .expect("Charlie row present");
7867                assert_eq!(charlie[o_total_idx], Value::Empty);
7868            }
7869            _ => panic!("expected rows"),
7870        }
7871    }
7872
7873    #[test]
7874    fn test_right_outer_join_emits_orphan_right_rows() {
7875        let mut engine = join_engine();
7876        // The orphan order (user_id = 99) has no matching User; RIGHT OUTER
7877        // should still emit it with the left-side (User) columns as Empty.
7878        let result = engine
7879            .execute_powql("User as u right join Order as o on u.id = o.user_id")
7880            .unwrap();
7881        match result {
7882            QueryResult::Rows { rows, columns } => {
7883                // All 4 orders appear (3 matched + 1 orphan).
7884                assert_eq!(rows.len(), 4);
7885                let u_name_idx = columns.iter().position(|c| c == "u.name").unwrap();
7886                let o_total_idx = columns.iter().position(|c| c == "o.total").unwrap();
7887                let orphan = rows
7888                    .iter()
7889                    .find(|r| r[o_total_idx] == Value::Int(999))
7890                    .expect("orphan order row present");
7891                assert_eq!(orphan[u_name_idx], Value::Empty);
7892            }
7893            _ => panic!("expected rows"),
7894        }
7895    }
7896
7897    #[test]
7898    fn test_cross_join_emits_full_product() {
7899        let mut engine = join_engine();
7900        let result = engine
7901            .execute_powql("User as u cross join Order as o")
7902            .unwrap();
7903        match result {
7904            QueryResult::Rows { rows, .. } => {
7905                assert_eq!(rows.len(), 3 * 4);
7906            }
7907            _ => panic!("expected rows"),
7908        }
7909    }
7910
7911    #[test]
7912    fn test_hash_join_handles_swapped_predicate_orientation() {
7913        // `on o.user_id = u.id` should resolve the same as `u.id = o.user_id`
7914        // — exercises the swapped-orientation branch in
7915        // `try_extract_equi_join_keys`.
7916        let mut engine = join_engine();
7917        let result = engine
7918            .execute_powql("User as u join Order as o on o.user_id = u.id { u.name, o.total }")
7919            .unwrap();
7920        match result {
7921            QueryResult::Rows { rows, columns } => {
7922                assert_eq!(columns, vec!["u.name", "o.total"]);
7923                assert_eq!(rows.len(), 3);
7924            }
7925            _ => panic!("expected rows"),
7926        }
7927    }
7928
7929    #[test]
7930    fn test_non_equi_join_falls_back_to_nested_loop() {
7931        // `u.id < o.user_id` isn't an equi-join, so the executor must
7932        // drop into the nested-loop path and still return correct rows.
7933        let mut engine = join_engine();
7934        let result = engine
7935            .execute_powql("User as u join Order as o on u.id < o.user_id")
7936            .unwrap();
7937        match result {
7938            QueryResult::Rows { rows, columns } => {
7939                // Pairs where u.id < o.user_id:
7940                //   User 1 < orders 2,99 = 2 rows (o.user_id=2 twice? no, only one order for user 2)
7941                //   Actually: orders have user_ids [1,1,2,99].
7942                //   User 1 (id=1): 1<1 no, 1<1 no, 1<2 yes, 1<99 yes → 2
7943                //   User 2 (id=2): 2<1 no, 2<1 no, 2<2 no, 2<99 yes → 1
7944                //   User 3 (id=3): 3<1 no, 3<1 no, 3<2 no, 3<99 yes → 1
7945                // Total 4.
7946                assert_eq!(rows.len(), 4);
7947                let u_id_idx = columns.iter().position(|c| c == "u.id").unwrap();
7948                let o_uid_idx = columns.iter().position(|c| c == "o.user_id").unwrap();
7949                for row in &rows {
7950                    match (&row[u_id_idx], &row[o_uid_idx]) {
7951                        (Value::Int(u), Value::Int(o)) => assert!(u < o),
7952                        _ => panic!("expected int columns"),
7953                    }
7954                }
7955            }
7956            _ => panic!("expected rows"),
7957        }
7958    }
7959
7960    #[test]
7961    fn test_hash_join_with_string_key() {
7962        // Exercise the Value::Str hash path — plus verifies Hash impl for
7963        // Value works end to end via FxHashMap.
7964        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
7965        let dir = std::env::temp_dir().join(format!("powdb_strjoin_{}_{}", std::process::id(), id));
7966        let mut engine = Engine::new(&dir).unwrap();
7967        engine
7968            .execute_powql("type A { required code: str, required label: str }")
7969            .unwrap();
7970        engine
7971            .execute_powql("type B { required code: str, required score: int }")
7972            .unwrap();
7973        engine
7974            .execute_powql(r#"insert A { code := "x", label := "X-label" }"#)
7975            .unwrap();
7976        engine
7977            .execute_powql(r#"insert A { code := "y", label := "Y-label" }"#)
7978            .unwrap();
7979        engine
7980            .execute_powql(r#"insert B { code := "x", score := 100 }"#)
7981            .unwrap();
7982        engine
7983            .execute_powql(r#"insert B { code := "y", score := 200 }"#)
7984            .unwrap();
7985        engine
7986            .execute_powql(r#"insert B { code := "z", score := 300 }"#)
7987            .unwrap();
7988
7989        let result = engine
7990            .execute_powql("A as a join B as b on a.code = b.code { a.label, b.score }")
7991            .unwrap();
7992        match result {
7993            QueryResult::Rows { rows, .. } => {
7994                // x→100, y→200. z has no matching A.
7995                assert_eq!(rows.len(), 2);
7996            }
7997            _ => panic!("expected rows"),
7998        }
7999    }
8000
8001    #[test]
8002    fn test_multi_join_chain() {
8003        // Third source — verify left-deep chains compose correctly.
8004        let mut engine = join_engine();
8005        engine
8006            .execute_powql("type Product { required id: int, required name: str }")
8007            .unwrap();
8008        engine
8009            .execute_powql(r#"insert Product { id := 100, name := "Widget" }"#)
8010            .unwrap();
8011        engine
8012            .execute_powql(r#"insert Product { id := 200, name := "Gadget" }"#)
8013            .unwrap();
8014        // Re-create Orders with a product_id column wouldn't work without
8015        // table alter; instead we pick a test that exercises the shape only.
8016        let result = engine
8017            .execute_powql(
8018                "User as u join Order as o on u.id = o.user_id \
8019             cross join Product as p",
8020            )
8021            .unwrap();
8022        match result {
8023            QueryResult::Rows { rows, columns } => {
8024                // 3 inner matches × 2 products = 6 rows.
8025                assert_eq!(rows.len(), 6);
8026                assert!(columns.contains(&"u.name".to_string()));
8027                assert!(columns.contains(&"o.total".to_string()));
8028                assert!(columns.contains(&"p.name".to_string()));
8029            }
8030            _ => panic!("expected rows"),
8031        }
8032    }
8033
8034    // ---- Mission E2a: DISTINCT + IN-list + BETWEEN + LIKE -----------------
8035
8036    #[test]
8037    fn test_distinct_deduplicates_rows() {
8038        let mut engine = test_engine();
8039        // Insert a second Alice to create a duplicate name.
8040        engine
8041            .execute_powql(
8042                r#"insert User { name := "Alice", email := "alice2@ex.com", age := 25 }"#,
8043            )
8044            .unwrap();
8045        let result = engine.execute_powql("User distinct { .name }").unwrap();
8046        match result {
8047            QueryResult::Rows { rows, .. } => {
8048                let names: Vec<&Value> = rows.iter().map(|r| &r[0]).collect();
8049                // 4 rows in table (Alice×2, Bob, Charlie) but 3 distinct names.
8050                assert_eq!(names.len(), 3);
8051                let alice_count = names
8052                    .iter()
8053                    .filter(|v| matches!(v, Value::Str(s) if s == "Alice"))
8054                    .count();
8055                assert_eq!(alice_count, 1);
8056                assert!(names
8057                    .iter()
8058                    .any(|v| matches!(v, Value::Str(s) if s == "Bob")));
8059                assert!(names
8060                    .iter()
8061                    .any(|v| matches!(v, Value::Str(s) if s == "Charlie")));
8062            }
8063            _ => panic!("expected rows"),
8064        }
8065    }
8066
8067    #[test]
8068    fn test_in_list_filter() {
8069        let mut engine = test_engine();
8070        let result = engine
8071            .execute_powql(r#"User filter .name in ("Alice", "Bob") { .name }"#)
8072            .unwrap();
8073        match result {
8074            QueryResult::Rows { rows, .. } => {
8075                assert_eq!(rows.len(), 2);
8076            }
8077            _ => panic!("expected rows"),
8078        }
8079    }
8080
8081    #[test]
8082    fn test_not_in_list_filter() {
8083        let mut engine = test_engine();
8084        let result = engine
8085            .execute_powql(r#"User filter .name not in ("Alice") { .name }"#)
8086            .unwrap();
8087        match result {
8088            QueryResult::Rows { rows, .. } => {
8089                // Bob and Charlie survive.
8090                assert_eq!(rows.len(), 2);
8091            }
8092            _ => panic!("expected rows"),
8093        }
8094    }
8095
8096    #[test]
8097    fn test_between_filter() {
8098        let mut engine = test_engine();
8099        let result = engine
8100            .execute_powql("User filter .age between 25 and 30 { .name, .age }")
8101            .unwrap();
8102        match result {
8103            QueryResult::Rows { rows, .. } => {
8104                // Alice is 30 (inclusive), Bob is 25 (inclusive).
8105                assert_eq!(rows.len(), 2);
8106            }
8107            _ => panic!("expected rows"),
8108        }
8109    }
8110
8111    #[test]
8112    fn test_between_filter_float_column_int_literals() {
8113        // Regression for Value::Ord cross-type bug: BETWEEN on a Float column
8114        // with Int literals previously returned zero rows because Ord fell
8115        // through to TypeId discriminant comparison instead of promoting Int
8116        // to f64. Verifies the fix end-to-end through the query engine.
8117        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
8118        let dir = std::env::temp_dir().join(format!(
8119            "powdb_exec_between_float_{}_{}",
8120            std::process::id(),
8121            id
8122        ));
8123        let mut engine = Engine::new(&dir).unwrap();
8124        engine
8125            .execute_powql("type Product { required name: str, required price: float }")
8126            .unwrap();
8127        engine
8128            .execute_powql(r#"insert Product { name := "Cable",   price := 29.0 }"#)
8129            .unwrap();
8130        engine
8131            .execute_powql(r#"insert Product { name := "Speaker", price := 175.5 }"#)
8132            .unwrap();
8133        engine
8134            .execute_powql(r#"insert Product { name := "Monitor", price := 450.0 }"#)
8135            .unwrap();
8136        engine
8137            .execute_powql(r#"insert Product { name := "Laptop",  price := 1299.0 }"#)
8138            .unwrap();
8139
8140        let result = engine
8141            .execute_powql("Product filter .price between 100 and 500 { .name, .price }")
8142            .unwrap();
8143        match result {
8144            QueryResult::Rows { rows, .. } => {
8145                assert_eq!(
8146                    rows.len(),
8147                    2,
8148                    "expected 2 rows in [100, 500] range, got {}: {:?}",
8149                    rows.len(),
8150                    rows
8151                );
8152                // Sorted by insert order: Speaker (175.5), Monitor (450.0).
8153                let names: Vec<&str> = rows
8154                    .iter()
8155                    .map(|r| match &r[0] {
8156                        Value::Str(s) => s.as_str(),
8157                        _ => panic!("expected string name"),
8158                    })
8159                    .collect();
8160                assert!(names.contains(&"Speaker"));
8161                assert!(names.contains(&"Monitor"));
8162            }
8163            _ => panic!("expected rows"),
8164        }
8165    }
8166
8167    #[test]
8168    fn test_not_between_filter() {
8169        let mut engine = test_engine();
8170        let result = engine
8171            .execute_powql("User filter .age not between 26 and 29 { .name }")
8172            .unwrap();
8173        match result {
8174            QueryResult::Rows { rows, .. } => {
8175                // Alice (30), Bob (25), Charlie (35) all outside [26,29].
8176                assert_eq!(rows.len(), 3);
8177            }
8178            _ => panic!("expected rows"),
8179        }
8180    }
8181
8182    #[test]
8183    fn test_like_prefix_match() {
8184        let mut engine = test_engine();
8185        let result = engine
8186            .execute_powql(r#"User filter .name like "Ali%" { .name }"#)
8187            .unwrap();
8188        match result {
8189            QueryResult::Rows { rows, .. } => {
8190                assert_eq!(rows.len(), 1);
8191                assert!(matches!(&rows[0][0], Value::Str(s) if s == "Alice"));
8192            }
8193            _ => panic!("expected rows"),
8194        }
8195    }
8196
8197    #[test]
8198    fn test_like_wildcard_underscore() {
8199        let mut engine = test_engine();
8200        let result = engine
8201            .execute_powql(r#"User filter .name like "_ob" { .name }"#)
8202            .unwrap();
8203        match result {
8204            QueryResult::Rows { rows, .. } => {
8205                assert_eq!(rows.len(), 1);
8206                assert!(matches!(&rows[0][0], Value::Str(s) if s == "Bob"));
8207            }
8208            _ => panic!("expected rows"),
8209        }
8210    }
8211
8212    #[test]
8213    fn test_not_like_filter() {
8214        let mut engine = test_engine();
8215        let result = engine
8216            .execute_powql(r#"User filter .name not like "A%" { .name }"#)
8217            .unwrap();
8218        match result {
8219            QueryResult::Rows { rows, .. } => {
8220                // Bob and Charlie survive (don't start with A).
8221                assert_eq!(rows.len(), 2);
8222            }
8223            _ => panic!("expected rows"),
8224        }
8225    }
8226
8227    #[test]
8228    fn test_in_list_with_integers() {
8229        let mut engine = test_engine();
8230        let result = engine
8231            .execute_powql("User filter .age in (25, 30) { .name }")
8232            .unwrap();
8233        match result {
8234            QueryResult::Rows { rows, .. } => {
8235                assert_eq!(rows.len(), 2);
8236            }
8237            _ => panic!("expected rows"),
8238        }
8239    }
8240
8241    #[test]
8242    fn test_like_full_match() {
8243        let mut engine = test_engine();
8244        // Exact match (no wildcards).
8245        let result = engine
8246            .execute_powql(r#"User filter .name like "Alice" { .name }"#)
8247            .unwrap();
8248        match result {
8249            QueryResult::Rows { rows, .. } => {
8250                assert_eq!(rows.len(), 1);
8251            }
8252            _ => panic!("expected rows"),
8253        }
8254    }
8255
8256    // ─── Mission E2b: GROUP BY + HAVING ────────────────────────────────────
8257
8258    #[test]
8259    fn test_group_by_count() {
8260        // All 3 users share the same "age bucket" when we group by a
8261        // derived column, but we can at least group by a column with
8262        // distinct values. test_engine has 3 distinct names.
8263        let mut engine = test_engine();
8264        let result = engine
8265            .execute_powql("User group .name { .name, n: count(.name) }")
8266            .unwrap();
8267        match result {
8268            QueryResult::Rows { columns, rows } => {
8269                assert_eq!(columns, vec!["name", "n"]);
8270                assert_eq!(rows.len(), 3); // 3 distinct names
8271                                           // Each group has 1 row.
8272                for row in &rows {
8273                    assert_eq!(row[1], Value::Int(1));
8274                }
8275            }
8276            _ => panic!("expected rows"),
8277        }
8278    }
8279
8280    #[test]
8281    fn test_group_by_sum_avg() {
8282        // Group all rows into one bucket by a constant column.
8283        // We'll use the mission_a_engine with a known shape.
8284        let mut engine = test_engine();
8285        // All 3 users: ages 30, 25, 35 → sum=90, avg=30.0
8286        let result = engine
8287            .execute_powql("User group .email { .email, total_age: sum(.age) }")
8288            .unwrap();
8289        match result {
8290            QueryResult::Rows { rows, .. } => {
8291                // Each email is unique → 3 groups, each with sum of one age.
8292                assert_eq!(rows.len(), 3);
8293            }
8294            _ => panic!("expected rows"),
8295        }
8296    }
8297
8298    #[test]
8299    fn test_group_by_with_filter() {
8300        let mut engine = test_engine();
8301        // Filter first, then group.
8302        let result = engine
8303            .execute_powql("User filter .age >= 30 group .name { .name, n: count(.name) }")
8304            .unwrap();
8305        match result {
8306            QueryResult::Rows { rows, .. } => {
8307                // Alice (30) and Charlie (35) survive filter.
8308                assert_eq!(rows.len(), 2);
8309            }
8310            _ => panic!("expected rows"),
8311        }
8312    }
8313
8314    #[test]
8315    fn test_group_by_having() {
8316        // Use mission_a_engine so we have multiple rows per group.
8317        let mut engine = mission_a_engine(30);
8318        // 30 rows: statuses cycle active/inactive/pending → 10 each.
8319        // Group by status, HAVING count > 5.
8320        let result = engine
8321            .execute_powql(
8322                "User group .status having count(.name) > 5 { .status, n: count(.name) }",
8323            )
8324            .unwrap();
8325        match result {
8326            QueryResult::Rows { columns, rows } => {
8327                assert_eq!(columns, vec!["status", "n"]);
8328                // All 3 groups have 10 rows each, all > 5.
8329                assert_eq!(rows.len(), 3);
8330                for row in &rows {
8331                    assert_eq!(row[1], Value::Int(10));
8332                }
8333            }
8334            _ => panic!("expected rows"),
8335        }
8336    }
8337
8338    #[test]
8339    fn test_group_by_having_filters_groups() {
8340        let mut engine = mission_a_engine(30);
8341        // HAVING count > 100 → no groups survive.
8342        let result = engine
8343            .execute_powql("User group .status having count(.name) > 100 { .status }")
8344            .unwrap();
8345        match result {
8346            QueryResult::Rows { rows, .. } => {
8347                assert_eq!(rows.len(), 0);
8348            }
8349            _ => panic!("expected rows"),
8350        }
8351    }
8352
8353    #[test]
8354    fn test_group_by_having_with_aliased_projection_agg() {
8355        // Regression: TS client found that when the projection duplicates
8356        // the aggregate used by HAVING (with an alias), HAVING silently
8357        // failed to filter. This asserts the dedup path produces correct
8358        // filtering.
8359        let mut engine = mission_a_engine(30);
8360        // 3 statuses, 10 rows each. HAVING >= 11 should exclude all.
8361        let result = engine
8362            .execute_powql(
8363                "User group .status having count(.name) >= 11 { .status, cnt: count(.name) }",
8364            )
8365            .unwrap();
8366        match result {
8367            QueryResult::Rows { rows, .. } => {
8368                assert_eq!(rows.len(), 0, "HAVING >= 11 should filter all groups");
8369            }
8370            _ => panic!("expected rows"),
8371        }
8372        // HAVING >= 10 should include all three.
8373        let result = engine
8374            .execute_powql(
8375                "User group .status having count(.name) >= 10 { .status, cnt: count(.name) }",
8376            )
8377            .unwrap();
8378        match result {
8379            QueryResult::Rows { rows, .. } => {
8380                assert_eq!(rows.len(), 3);
8381                for row in &rows {
8382                    assert_eq!(row[1], Value::Int(10));
8383                }
8384            }
8385            _ => panic!("expected rows"),
8386        }
8387    }
8388
8389    #[test]
8390    fn test_group_by_having_post_projection() {
8391        // Regression: HAVING placed after the projection (`{ ... } having cnt >= N`,
8392        // referencing projection aliases) was silently dropped. This reproduces
8393        // the exact form the TS client used.
8394        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
8395        let dir =
8396            std::env::temp_dir().join(format!("powdb_having_post_{}_{}", std::process::id(), id));
8397        let mut engine = Engine::new(&dir).unwrap();
8398        engine
8399            .execute_powql("type Person { required name: str, required age: int, city: str }")
8400            .unwrap();
8401        for (name, age, city) in [
8402            ("Alice", 30, "NYC"),
8403            ("Bob", 24, "SF"),
8404            ("Carol", 41, "LA"),
8405            ("Dave", 28, "NYC"),
8406            ("Eve", 35, "Austin"),
8407        ] {
8408            engine
8409                .execute_powql(&format!(
8410                    r#"insert Person {{ name := "{name}", age := {age}, city := "{city}" }}"#
8411                ))
8412                .unwrap();
8413        }
8414        let result = engine
8415            .execute_powql("Person group .city { .city, cnt: count(.name) } having cnt >= 2")
8416            .unwrap();
8417        match result {
8418            QueryResult::Rows { rows, .. } => {
8419                assert_eq!(rows.len(), 1, "only NYC has >= 2 people, got: {rows:?}");
8420                assert_eq!(rows[0][0], Value::Str("NYC".into()));
8421                assert_eq!(rows[0][1], Value::Int(2));
8422            }
8423            _ => panic!("expected rows"),
8424        }
8425    }
8426
8427    #[test]
8428    fn test_having_without_group_by_errors() {
8429        let mut engine = test_engine();
8430        let err = engine.execute_powql("User { .name } having count(.name) > 1");
8431        assert!(
8432            err.is_err(),
8433            "HAVING without GROUP BY should be a parse error"
8434        );
8435    }
8436
8437    #[test]
8438    fn test_group_by_having_reproduces_ts_client_case() {
8439        // Exact reproduction of the TS client test that surfaced the bug:
8440        // 5 people across 4 cities, HAVING count >= 2 should keep only NYC.
8441        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
8442        let dir =
8443            std::env::temp_dir().join(format!("powdb_having_ts_{}_{}", std::process::id(), id));
8444        let mut engine = Engine::new(&dir).unwrap();
8445        engine
8446            .execute_powql("type Person { required name: str, required age: int, city: str }")
8447            .unwrap();
8448        for (name, age, city) in [
8449            ("Alice", 30, "NYC"),
8450            ("Bob", 24, "SF"),
8451            ("Carol", 41, "LA"),
8452            ("Dave", 28, "NYC"),
8453            ("Eve", 35, "Austin"),
8454        ] {
8455            engine
8456                .execute_powql(&format!(
8457                    r#"insert Person {{ name := "{name}", age := {age}, city := "{city}" }}"#
8458                ))
8459                .unwrap();
8460        }
8461        let result = engine
8462            .execute_powql(
8463                "Person group .city having count(.name) >= 2 { .city, cnt: count(.name) }",
8464            )
8465            .unwrap();
8466        match result {
8467            QueryResult::Rows { rows, .. } => {
8468                assert_eq!(rows.len(), 1, "only NYC has >= 2 people, got: {rows:?}");
8469                assert_eq!(rows[0][0], Value::Str("NYC".into()));
8470                assert_eq!(rows[0][1], Value::Int(2));
8471            }
8472            _ => panic!("expected rows"),
8473        }
8474    }
8475
8476    #[test]
8477    fn test_group_by_having_filters_some_groups() {
8478        // Skewed distribution — some groups pass HAVING, some don't.
8479        let mut engine = test_engine();
8480        // test_engine has 3 rows, all distinct names. Add duplicates for Alice.
8481        engine
8482            .execute_powql(r#"insert User { name := "Alice", email := "a2@ex.com", age := 31 }"#)
8483            .unwrap();
8484        engine
8485            .execute_powql(r#"insert User { name := "Alice", email := "a3@ex.com", age := 32 }"#)
8486            .unwrap();
8487        // Now: Alice ×3, Bob ×1, Charlie ×1. HAVING count >= 2 → only Alice.
8488        let result = engine
8489            .execute_powql("User group .name having count(.name) >= 2 { .name, cnt: count(.name) }")
8490            .unwrap();
8491        match result {
8492            QueryResult::Rows { rows, .. } => {
8493                assert_eq!(rows.len(), 1);
8494                assert_eq!(rows[0][0], Value::Str("Alice".into()));
8495                assert_eq!(rows[0][1], Value::Int(3));
8496            }
8497            _ => panic!("expected rows"),
8498        }
8499    }
8500
8501    #[test]
8502    fn test_group_by_min_max() {
8503        let mut engine = mission_a_engine(30);
8504        // 30 rows, ages = 18 + (i % 60) for i in 0..30, so ages 18..47.
8505        // Group by status (3 groups of 10 each).
8506        // status=active: i=0,3,6,9,12,15,18,21,24,27 → ages 18,21,24,27,30,33,36,39,42,45
8507        // min=18, max=45
8508        let result = engine.execute_powql(
8509            r#"User filter .status = "active" group .status { .status, lo: min(.age), hi: max(.age) }"#,
8510        ).unwrap();
8511        match result {
8512            QueryResult::Rows { columns, rows } => {
8513                assert_eq!(columns, vec!["status", "lo", "hi"]);
8514                assert_eq!(rows.len(), 1);
8515                assert_eq!(rows[0][0], Value::Str("active".into()));
8516                assert_eq!(rows[0][1], Value::Int(18));
8517                assert_eq!(rows[0][2], Value::Int(45));
8518            }
8519            _ => panic!("expected rows"),
8520        }
8521    }
8522
8523    #[test]
8524    fn test_group_by_avg() {
8525        let mut engine = mission_a_engine(6);
8526        // 6 rows: i=0..5
8527        // active (i=0,3): ages 18,21 → avg=19.5
8528        // inactive (i=1,4): ages 19,22 → avg=20.5
8529        // pending (i=2,5): ages 20,23 → avg=21.5
8530        let result = engine
8531            .execute_powql(
8532                r#"User filter .status = "active" group .status { .status, a: avg(.age) }"#,
8533            )
8534            .unwrap();
8535        match result {
8536            QueryResult::Rows { rows, .. } => {
8537                assert_eq!(rows.len(), 1);
8538                match &rows[0][1] {
8539                    Value::Float(v) => assert!((v - 19.5).abs() < 0.001),
8540                    other => panic!("expected float, got {other:?}"),
8541                }
8542            }
8543            _ => panic!("expected rows"),
8544        }
8545    }
8546
8547    // ─── IS NULL / IS NOT NULL tests ─────────────────────────────────────
8548
8549    #[test]
8550    fn test_is_null_filter() {
8551        let mut engine = test_engine();
8552        engine
8553            .execute_powql(r#"insert User { name := "Diana", email := "diana@ex.com" }"#)
8554            .unwrap();
8555        let result = engine
8556            .execute_powql("User filter .age is null { .name }")
8557            .unwrap();
8558        match result {
8559            QueryResult::Rows { rows, .. } => {
8560                assert_eq!(rows.len(), 1);
8561                assert_eq!(rows[0][0], Value::Str("Diana".into()));
8562            }
8563            _ => panic!("expected rows"),
8564        }
8565    }
8566
8567    #[test]
8568    fn test_is_not_null_filter() {
8569        let mut engine = test_engine();
8570        engine
8571            .execute_powql(r#"insert User { name := "Diana", email := "diana@ex.com" }"#)
8572            .unwrap();
8573        let result = engine
8574            .execute_powql("User filter .age is not null { .name }")
8575            .unwrap();
8576        match result {
8577            QueryResult::Rows { rows, .. } => {
8578                assert_eq!(rows.len(), 3);
8579            }
8580            _ => panic!("expected rows"),
8581        }
8582    }
8583
8584    #[test]
8585    fn test_is_null_count() {
8586        let mut engine = test_engine();
8587        engine
8588            .execute_powql(r#"insert User { name := "Diana", email := "diana@ex.com" }"#)
8589            .unwrap();
8590        let result = engine
8591            .execute_powql("count(User filter .age is null)")
8592            .unwrap();
8593        match result {
8594            QueryResult::Scalar(Value::Int(n)) => assert_eq!(n, 1),
8595            _ => panic!("expected scalar int"),
8596        }
8597    }
8598
8599    #[test]
8600    fn test_is_null_combined_with_and() {
8601        let mut engine = test_engine();
8602        engine
8603            .execute_powql(r#"insert User { name := "Diana", email := "diana@ex.com" }"#)
8604            .unwrap();
8605        engine
8606            .execute_powql(r#"insert User { name := "Eve", email := "eve@ex.com" }"#)
8607            .unwrap();
8608        let result = engine
8609            .execute_powql(r#"User filter .age is null and .name = "Diana" { .name }"#)
8610            .unwrap();
8611        match result {
8612            QueryResult::Rows { rows, .. } => {
8613                assert_eq!(rows.len(), 1);
8614                assert_eq!(rows[0][0], Value::Str("Diana".into()));
8615            }
8616            _ => panic!("expected rows"),
8617        }
8618    }
8619
8620    #[test]
8621    fn test_eq_null_matches_is_null() {
8622        let mut engine = test_engine();
8623        engine
8624            .execute_powql(r#"insert User { name := "Diana", email := "diana@ex.com" }"#)
8625            .unwrap();
8626        let result = engine
8627            .execute_powql("User filter .age = null { .name }")
8628            .unwrap();
8629        match result {
8630            QueryResult::Rows { rows, .. } => {
8631                assert_eq!(rows.len(), 1);
8632                assert_eq!(rows[0][0], Value::Str("Diana".into()));
8633            }
8634            _ => panic!("expected rows"),
8635        }
8636    }
8637
8638    #[test]
8639    fn test_neq_null_matches_is_not_null() {
8640        let mut engine = test_engine();
8641        engine
8642            .execute_powql(r#"insert User { name := "Diana", email := "diana@ex.com" }"#)
8643            .unwrap();
8644        let result = engine
8645            .execute_powql("User filter .age != null { .name }")
8646            .unwrap();
8647        match result {
8648            QueryResult::Rows { rows, .. } => {
8649                assert_eq!(rows.len(), 3);
8650            }
8651            _ => panic!("expected rows"),
8652        }
8653    }
8654
8655    // ─── String function tests ─────────────────────────────────────────────
8656
8657    #[test]
8658    fn test_upper_in_filter() {
8659        let mut engine = test_engine();
8660        let result = engine
8661            .execute_powql(r#"User filter upper(.name) = "ALICE""#)
8662            .unwrap();
8663        match result {
8664            QueryResult::Rows { rows, .. } => {
8665                assert_eq!(rows.len(), 1);
8666                assert_eq!(rows[0][0], Value::Str("Alice".into()));
8667            }
8668            _ => panic!("expected rows"),
8669        }
8670    }
8671
8672    #[test]
8673    fn test_lower_in_projection() {
8674        let mut engine = test_engine();
8675        let result = engine.execute_powql("User { low: lower(.email) }").unwrap();
8676        match result {
8677            QueryResult::Rows { columns, rows } => {
8678                assert_eq!(columns, vec!["low"]);
8679                assert_eq!(rows.len(), 3);
8680                assert_eq!(rows[0][0], Value::Str("alice@ex.com".into()));
8681            }
8682            _ => panic!("expected rows"),
8683        }
8684    }
8685
8686    #[test]
8687    fn test_length_in_projection() {
8688        let mut engine = test_engine();
8689        let result = engine
8690            .execute_powql("User { .name, len: length(.name) }")
8691            .unwrap();
8692        match result {
8693            QueryResult::Rows { columns, rows } => {
8694                assert_eq!(columns, vec!["name", "len"]);
8695                assert_eq!(rows[0][1], Value::Int(5));
8696                assert_eq!(rows[1][1], Value::Int(3));
8697                assert_eq!(rows[2][1], Value::Int(7));
8698            }
8699            _ => panic!("expected rows"),
8700        }
8701    }
8702
8703    #[test]
8704    fn test_substring_in_projection() {
8705        let mut engine = test_engine();
8706        let result = engine
8707            .execute_powql("User { sub: substring(.name, 1, 3) }")
8708            .unwrap();
8709        match result {
8710            QueryResult::Rows { rows, .. } => {
8711                assert_eq!(rows[0][0], Value::Str("Ali".into()));
8712                assert_eq!(rows[1][0], Value::Str("Bob".into()));
8713                assert_eq!(rows[2][0], Value::Str("Cha".into()));
8714            }
8715            _ => panic!("expected rows"),
8716        }
8717    }
8718
8719    #[test]
8720    fn test_concat_in_projection() {
8721        let mut engine = test_engine();
8722        let result = engine
8723            .execute_powql(r#"User { full: concat(.name, " - ", .email) }"#)
8724            .unwrap();
8725        match result {
8726            QueryResult::Rows { rows, .. } => {
8727                assert_eq!(rows[0][0], Value::Str("Alice - alice@ex.com".into()));
8728                assert_eq!(rows[1][0], Value::Str("Bob - bob@ex.com".into()));
8729                assert_eq!(rows[2][0], Value::Str("Charlie - charlie@ex.com".into()));
8730            }
8731            _ => panic!("expected rows"),
8732        }
8733    }
8734
8735    #[test]
8736    fn test_concat_coerces_int() {
8737        let mut engine = test_engine();
8738        let result = engine
8739            .execute_powql(r#"User { info: concat(.name, " age=", .age) }"#)
8740            .unwrap();
8741        match result {
8742            QueryResult::Rows { rows, .. } => {
8743                assert_eq!(rows[0][0], Value::Str("Alice age=30".into()));
8744            }
8745            _ => panic!("expected rows"),
8746        }
8747    }
8748
8749    // ─── CASE WHEN tests ───────────────────────────────────────────────
8750
8751    #[test]
8752    fn test_case_in_projection() {
8753        let mut engine = test_engine();
8754        let result = engine.execute_powql(
8755            r#"User { .name, label: case when .age > 30 then "senior" when .age >= 30 then "exactly30" else "young" end }"#
8756        ).unwrap();
8757        match result {
8758            QueryResult::Rows { columns, rows } => {
8759                assert_eq!(columns, vec!["name", "label"]);
8760                assert_eq!(rows.len(), 3);
8761                for row in &rows {
8762                    let name = &row[0];
8763                    let label = &row[1];
8764                    match name {
8765                        Value::Str(n) if n == "Alice" => {
8766                            assert_eq!(label, &Value::Str("exactly30".into()))
8767                        }
8768                        Value::Str(n) if n == "Bob" => {
8769                            assert_eq!(label, &Value::Str("young".into()))
8770                        }
8771                        Value::Str(n) if n == "Charlie" => {
8772                            assert_eq!(label, &Value::Str("senior".into()))
8773                        }
8774                        _ => panic!("unexpected name: {name:?}"),
8775                    }
8776                }
8777            }
8778            _ => panic!("expected rows"),
8779        }
8780    }
8781
8782    #[test]
8783    fn test_case_in_filter() {
8784        let mut engine = test_engine();
8785        let result = engine
8786            .execute_powql(r#"User filter case when .age > 30 then true else false end"#)
8787            .unwrap();
8788        match result {
8789            QueryResult::Rows { rows, .. } => {
8790                assert_eq!(rows.len(), 1);
8791                assert_eq!(rows[0][0], Value::Str("Charlie".into()));
8792            }
8793            _ => panic!("expected rows"),
8794        }
8795    }
8796
8797    #[test]
8798    fn test_case_without_else_returns_empty() {
8799        let mut engine = test_engine();
8800        let result = engine
8801            .execute_powql(r#"User { .name, label: case when .age > 100 then "old" end }"#)
8802            .unwrap();
8803        match result {
8804            QueryResult::Rows { rows, .. } => {
8805                for row in &rows {
8806                    assert_eq!(row[1], Value::Empty);
8807                }
8808            }
8809            _ => panic!("expected rows"),
8810        }
8811    }
8812
8813    // ─── Mul/Div expression tests (E2f) ───────────────────────────────
8814
8815    #[test]
8816    fn test_mul_in_projection() {
8817        let mut engine = test_engine();
8818        let result = engine
8819            .execute_powql("User { .name, double_age: .age * 2 }")
8820            .unwrap();
8821        match result {
8822            QueryResult::Rows { columns, rows } => {
8823                assert_eq!(columns, vec!["name", "double_age"]);
8824                // Alice age=30 → 60, Bob age=25 → 50, Charlie age=35 → 70
8825                let ages: Vec<_> = rows.iter().map(|r| &r[1]).collect();
8826                assert!(ages.contains(&&Value::Int(60)));
8827                assert!(ages.contains(&&Value::Int(50)));
8828                assert!(ages.contains(&&Value::Int(70)));
8829            }
8830            _ => panic!("expected rows"),
8831        }
8832    }
8833
8834    #[test]
8835    fn test_div_in_filter() {
8836        let mut engine = test_engine();
8837        let result = engine.execute_powql("User filter .age / 10 > 2").unwrap();
8838        match result {
8839            QueryResult::Rows { rows, .. } => {
8840                // 30/10=3>2 ✓, 25/10=2 ✗, 35/10=3>2 ✓
8841                assert_eq!(rows.len(), 2);
8842            }
8843            _ => panic!("expected rows"),
8844        }
8845    }
8846
8847    // ─── Multi-column ORDER BY tests (E2f) ────────────────────────────
8848
8849    #[test]
8850    fn test_multi_order_by() {
8851        let mut engine = test_engine();
8852        // Insert another 30-year-old so we can test tiebreaker
8853        engine
8854            .execute_powql(r#"insert User { name := "Dave", email := "dave@ex.com", age := 30 }"#)
8855            .unwrap();
8856        let result = engine
8857            .execute_powql("User order .age asc, .name asc { .name, .age }")
8858            .unwrap();
8859        match result {
8860            QueryResult::Rows { rows, .. } => {
8861                // Expected: Bob(25), Alice(30), Dave(30), Charlie(35)
8862                assert_eq!(rows[0][0], Value::Str("Bob".into()));
8863                assert_eq!(rows[1][0], Value::Str("Alice".into()));
8864                assert_eq!(rows[2][0], Value::Str("Dave".into()));
8865                assert_eq!(rows[3][0], Value::Str("Charlie".into()));
8866            }
8867            _ => panic!("expected rows"),
8868        }
8869    }
8870
8871    #[test]
8872    fn test_multi_order_mixed_direction() {
8873        let mut engine = test_engine();
8874        engine
8875            .execute_powql(r#"insert User { name := "Dave", email := "dave@ex.com", age := 30 }"#)
8876            .unwrap();
8877        let result = engine
8878            .execute_powql("User order .age asc, .name desc { .name, .age }")
8879            .unwrap();
8880        match result {
8881            QueryResult::Rows { rows, .. } => {
8882                // Expected: Bob(25), Dave(30), Alice(30), Charlie(35)
8883                assert_eq!(rows[0][0], Value::Str("Bob".into()));
8884                assert_eq!(rows[1][0], Value::Str("Dave".into()));
8885                assert_eq!(rows[2][0], Value::Str("Alice".into()));
8886                assert_eq!(rows[3][0], Value::Str("Charlie".into()));
8887            }
8888            _ => panic!("expected rows"),
8889        }
8890    }
8891
8892    // ─── ALTER TABLE / DROP TABLE tests (E2g) ─────────────────────────
8893
8894    #[test]
8895    fn test_alter_add_column() {
8896        let mut engine = test_engine();
8897        let result = engine
8898            .execute_powql("alter User add column status: str")
8899            .unwrap();
8900        match result {
8901            QueryResult::Executed { message } => {
8902                assert!(message.contains("status"));
8903                assert!(message.contains("User"));
8904            }
8905            other => panic!("expected Executed, got {other:?}"),
8906        }
8907        // Verify schema was updated — new inserts can use the new column
8908        engine.execute_powql(r#"insert User { name := "Eve", email := "eve@ex.com", age := 22, status := "active" }"#).unwrap();
8909        let result = engine
8910            .execute_powql(r#"User filter .name = "Eve" { .name, .status }"#)
8911            .unwrap();
8912        match result {
8913            QueryResult::Rows { columns, rows } => {
8914                assert_eq!(columns, vec!["name", "status"]);
8915                assert_eq!(rows.len(), 1);
8916                assert_eq!(rows[0][1], Value::Str("active".into()));
8917            }
8918            other => panic!("expected rows, got {other:?}"),
8919        }
8920    }
8921
8922    #[test]
8923    fn test_alter_add_column_reads_old_rows() {
8924        // Regression: before the catalog rewrite path existed, rows
8925        // inserted before `alter ... add column` were left on disk
8926        // with the pre-alter variable-offset-table layout. A bare
8927        // `Type` scan then walked `decode_row` which read
8928        // `n_var + 1` offsets using the NEW schema and panicked with
8929        // "range end index X out of range for slice of length Y".
8930        //
8931        // This test reproduces that exactly: insert, alter, bare scan.
8932        // Any panic or wrong row count means the rewrite regressed.
8933        let mut engine = test_engine();
8934        engine
8935            .execute_powql("alter User add column country: str")
8936            .unwrap();
8937        // Bare scan: NO filter, so the planner cannot skip old rows.
8938        let result = engine.execute_powql("User").unwrap();
8939        match result {
8940            QueryResult::Rows { columns, rows } => {
8941                assert!(columns.contains(&"country".to_string()));
8942                assert_eq!(rows.len(), 3, "three old rows must still be readable");
8943                let country_idx = columns
8944                    .iter()
8945                    .position(|c| c == "country")
8946                    .expect("country column");
8947                for row in &rows {
8948                    assert_eq!(
8949                        row[country_idx],
8950                        Value::Empty,
8951                        "backfilled column must be Empty"
8952                    );
8953                }
8954            }
8955            other => panic!("expected rows, got {other:?}"),
8956        }
8957    }
8958
8959    #[test]
8960    fn test_alter_add_required_column_fails() {
8961        // Adding a required column to a non-empty table has no
8962        // default value to backfill with, so storing `Empty` would
8963        // silently violate the required invariant. The catalog must
8964        // reject it.
8965        let mut engine = test_engine();
8966        let err = engine
8967            .execute_powql("alter User add column required country: str")
8968            .expect_err("required-column add on non-empty table must fail");
8969        let msg = err.to_string().to_lowercase();
8970        assert!(
8971            msg.contains("required") || msg.contains("backfill"),
8972            "error should mention required/backfill, got: {err}"
8973        );
8974        // And the schema must NOT have silently gained the column.
8975        let result = engine.execute_powql("User").unwrap();
8976        if let QueryResult::Rows { columns, .. } = result {
8977            assert!(
8978                !columns.contains(&"country".to_string()),
8979                "failed alter must not mutate the schema"
8980            );
8981        }
8982    }
8983
8984    #[test]
8985    fn test_alter_add_column_then_update_old_row() {
8986        // Regression-plus: after the rewrite path backfills Empty, an
8987        // UPDATE against an old row's new column must round-trip.
8988        // This exercises encode/decode with the new schema shape on a
8989        // row that was originally written with the old shape.
8990        let mut engine = test_engine();
8991        engine
8992            .execute_powql("alter User add column country: str")
8993            .unwrap();
8994        engine
8995            .execute_powql(r#"User filter .name = "Alice" update { country := "US" }"#)
8996            .unwrap();
8997
8998        let result = engine
8999            .execute_powql(r#"User filter .name = "Alice" { .name, .country }"#)
9000            .unwrap();
9001        match result {
9002            QueryResult::Rows { rows, .. } => {
9003                assert_eq!(rows.len(), 1);
9004                assert_eq!(rows[0][0], Value::Str("Alice".into()));
9005                assert_eq!(rows[0][1], Value::Str("US".into()));
9006            }
9007            other => panic!("expected rows, got {other:?}"),
9008        }
9009
9010        // The other two rows should still decode cleanly with Empty.
9011        let result = engine.execute_powql("User").unwrap();
9012        match result {
9013            QueryResult::Rows { columns, rows } => {
9014                assert_eq!(rows.len(), 3);
9015                let country_idx = columns
9016                    .iter()
9017                    .position(|c| c == "country")
9018                    .expect("country column");
9019                let empties = rows
9020                    .iter()
9021                    .filter(|r| r[country_idx] == Value::Empty)
9022                    .count();
9023                assert_eq!(
9024                    empties, 2,
9025                    "two unchanged old rows must still read as Empty"
9026                );
9027            }
9028            other => panic!("expected rows, got {other:?}"),
9029        }
9030    }
9031
9032    #[test]
9033    fn test_alter_drop_column() {
9034        let mut engine = test_engine();
9035        engine
9036            .execute_powql("alter User drop column email")
9037            .unwrap();
9038        let result = engine.execute_powql("User { .name, .age }").unwrap();
9039        match result {
9040            QueryResult::Rows { columns, rows } => {
9041                assert_eq!(columns, vec!["name", "age"]);
9042                assert_eq!(rows.len(), 3);
9043            }
9044            other => panic!("expected rows, got {other:?}"),
9045        }
9046    }
9047
9048    #[test]
9049    fn test_drop_table() {
9050        let mut engine = test_engine();
9051        let result = engine.execute_powql("drop User").unwrap();
9052        match result {
9053            QueryResult::Executed { message } => {
9054                assert!(message.contains("User"));
9055                assert!(message.contains("dropped"));
9056            }
9057            other => panic!("expected Executed, got {other:?}"),
9058        }
9059        // Querying the dropped table should fail
9060        assert!(engine.execute_powql("User").is_err());
9061    }
9062
9063    #[test]
9064    fn test_drop_nonexistent_table_errors() {
9065        let mut engine = test_engine();
9066        assert!(engine.execute_powql("drop NonExistent").is_err());
9067    }
9068
9069    #[test]
9070    fn test_alter_add_duplicate_column_errors() {
9071        let mut engine = test_engine();
9072        assert!(engine.execute_powql("alter User add name: str").is_err());
9073    }
9074
9075    #[test]
9076    fn test_alter_drop_nonexistent_column_errors() {
9077        let mut engine = test_engine();
9078        assert!(engine
9079            .execute_powql("alter User drop column nonexistent")
9080            .is_err());
9081    }
9082
9083    #[test]
9084    fn test_alter_add_index_creates_index() {
9085        let mut engine = test_engine();
9086        let result = engine.execute_powql("alter User add index .email").unwrap();
9087        match result {
9088            QueryResult::Executed { message } => {
9089                assert!(message.contains("User.email"), "message: {message}");
9090            }
9091            other => panic!("expected Executed, got {other:?}"),
9092        }
9093        // Equality lookup on the indexed column should still return results.
9094        let result = engine
9095            .execute_powql(r#"User filter .email = "alice@ex.com" { .name }"#)
9096            .unwrap();
9097        match result {
9098            QueryResult::Rows { rows, .. } => {
9099                assert_eq!(rows.len(), 1);
9100                assert_eq!(rows[0][0], Value::Str("Alice".into()));
9101            }
9102            other => panic!("expected rows, got {other:?}"),
9103        }
9104    }
9105
9106    #[test]
9107    fn test_parse_rejects_trailing_tokens() {
9108        // Previously `User create_index .email` silently succeeded as
9109        // `User` (ignoring the trailing unknown tokens). Now it's a
9110        // parse error so users know the syntax isn't recognized.
9111        let mut engine = test_engine();
9112        assert!(engine.execute_powql("User create_index .email").is_err());
9113        assert!(engine.execute_powql("User add_column score: int").is_err());
9114        assert!(engine.execute_powql("User drop_column email").is_err());
9115    }
9116
9117    // ─── IN subquery tests (E2h) ─────────────────────────────────────
9118
9119    #[test]
9120    fn test_in_subquery_basic() {
9121        let mut engine = test_engine();
9122        // Create a second table with a subset of user names
9123        engine
9124            .execute_powql("type VIP { required name: str }")
9125            .unwrap();
9126        engine
9127            .execute_powql(r#"insert VIP { name := "Alice" }"#)
9128            .unwrap();
9129        engine
9130            .execute_powql(r#"insert VIP { name := "Charlie" }"#)
9131            .unwrap();
9132
9133        let result = engine
9134            .execute_powql("User filter .name in (VIP { .name }) { .name, .age }")
9135            .unwrap();
9136        match result {
9137            QueryResult::Rows { rows, .. } => {
9138                assert_eq!(rows.len(), 2);
9139                let names: Vec<_> = rows.iter().map(|r| &r[0]).collect();
9140                assert!(names.contains(&&Value::Str("Alice".into())));
9141                assert!(names.contains(&&Value::Str("Charlie".into())));
9142            }
9143            _ => panic!("expected rows"),
9144        }
9145    }
9146
9147    #[test]
9148    fn test_not_in_subquery() {
9149        let mut engine = test_engine();
9150        engine
9151            .execute_powql("type VIP { required name: str }")
9152            .unwrap();
9153        engine
9154            .execute_powql(r#"insert VIP { name := "Alice" }"#)
9155            .unwrap();
9156        engine
9157            .execute_powql(r#"insert VIP { name := "Charlie" }"#)
9158            .unwrap();
9159
9160        let result = engine
9161            .execute_powql("User filter .name not in (VIP { .name }) { .name }")
9162            .unwrap();
9163        match result {
9164            QueryResult::Rows { rows, .. } => {
9165                assert_eq!(rows.len(), 1);
9166                assert_eq!(rows[0][0], Value::Str("Bob".into()));
9167            }
9168            _ => panic!("expected rows"),
9169        }
9170    }
9171
9172    #[test]
9173    fn test_in_subquery_with_filter() {
9174        let mut engine = test_engine();
9175        engine
9176            .execute_powql("type Score { required name: str, required points: int }")
9177            .unwrap();
9178        engine
9179            .execute_powql(r#"insert Score { name := "Alice", points := 100 }"#)
9180            .unwrap();
9181        engine
9182            .execute_powql(r#"insert Score { name := "Bob", points := 50 }"#)
9183            .unwrap();
9184        engine
9185            .execute_powql(r#"insert Score { name := "Charlie", points := 80 }"#)
9186            .unwrap();
9187
9188        // Find users whose names are in the high-scorers list (points > 70)
9189        let result = engine
9190            .execute_powql("User filter .name in (Score filter .points > 70 { .name }) { .name }")
9191            .unwrap();
9192        match result {
9193            QueryResult::Rows { rows, .. } => {
9194                assert_eq!(rows.len(), 2);
9195                let names: Vec<_> = rows.iter().map(|r| &r[0]).collect();
9196                assert!(names.contains(&&Value::Str("Alice".into())));
9197                assert!(names.contains(&&Value::Str("Charlie".into())));
9198            }
9199            _ => panic!("expected rows"),
9200        }
9201    }
9202
9203    // ─── EXISTS subquery tests (uncorrelated) ───────────────────────────
9204
9205    #[test]
9206    fn test_exists_subquery_uncorrelated_true() {
9207        let mut engine = test_engine();
9208        // A side table with at least one row → EXISTS(...) = true, so the
9209        // filter passes every User row through.
9210        engine
9211            .execute_powql("type VIP { required name: str }")
9212            .unwrap();
9213        engine
9214            .execute_powql(r#"insert VIP { name := "Alice" }"#)
9215            .unwrap();
9216
9217        let result = engine
9218            .execute_powql("User filter exists (VIP) { .name }")
9219            .unwrap();
9220        match result {
9221            QueryResult::Rows { rows, .. } => {
9222                assert_eq!(rows.len(), 3, "all users should pass when EXISTS is true");
9223            }
9224            _ => panic!("expected rows"),
9225        }
9226    }
9227
9228    #[test]
9229    fn test_exists_subquery_uncorrelated_false() {
9230        let mut engine = test_engine();
9231        // An empty side table → EXISTS(...) = false, so no User rows pass.
9232        engine
9233            .execute_powql("type VIP { required name: str }")
9234            .unwrap();
9235
9236        let result = engine
9237            .execute_powql("User filter exists (VIP) { .name }")
9238            .unwrap();
9239        match result {
9240            QueryResult::Rows { rows, .. } => {
9241                assert_eq!(rows.len(), 0, "no rows should pass when EXISTS is false");
9242            }
9243            _ => panic!("expected rows"),
9244        }
9245    }
9246
9247    #[test]
9248    fn test_not_exists_subquery() {
9249        let mut engine = test_engine();
9250        // NOT EXISTS over an empty table → true → all rows pass.
9251        engine
9252            .execute_powql("type VIP { required name: str }")
9253            .unwrap();
9254
9255        let result = engine
9256            .execute_powql("User filter not exists (VIP) { .name }")
9257            .unwrap();
9258        match result {
9259            QueryResult::Rows { rows, .. } => {
9260                assert_eq!(rows.len(), 3);
9261            }
9262            _ => panic!("expected rows"),
9263        }
9264
9265        // Now add a row — NOT EXISTS becomes false → no rows pass.
9266        engine
9267            .execute_powql(r#"insert VIP { name := "Alice" }"#)
9268            .unwrap();
9269        let result = engine
9270            .execute_powql("User filter not exists (VIP) { .name }")
9271            .unwrap();
9272        match result {
9273            QueryResult::Rows { rows, .. } => {
9274                assert_eq!(rows.len(), 0);
9275            }
9276            _ => panic!("expected rows"),
9277        }
9278    }
9279
9280    #[test]
9281    fn test_exists_subquery_with_inner_filter() {
9282        let mut engine = test_engine();
9283        // Subquery with its own filter: only rows matching the inner
9284        // predicate count toward EXISTS.
9285        engine
9286            .execute_powql("type Score { required name: str, required points: int }")
9287            .unwrap();
9288        engine
9289            .execute_powql(r#"insert Score { name := "Alice", points := 100 }"#)
9290            .unwrap();
9291
9292        // Inner filter matches → EXISTS true → all users pass.
9293        let result = engine
9294            .execute_powql("User filter exists (Score filter .points > 50) { .name }")
9295            .unwrap();
9296        match result {
9297            QueryResult::Rows { rows, .. } => assert_eq!(rows.len(), 3),
9298            _ => panic!("expected rows"),
9299        }
9300    }
9301
9302    #[test]
9303    fn test_exists_subquery_with_inner_filter_no_match() {
9304        // Fresh engine so the plan cache doesn't collide with the
9305        // `> 50` shape from the sibling test.
9306        let mut engine = test_engine();
9307        engine
9308            .execute_powql("type Score { required name: str, required points: int }")
9309            .unwrap();
9310        engine
9311            .execute_powql(r#"insert Score { name := "Alice", points := 100 }"#)
9312            .unwrap();
9313
9314        // Inner filter matches nothing → EXISTS false → no users pass.
9315        let result = engine
9316            .execute_powql("User filter exists (Score filter .points > 1000) { .name }")
9317            .unwrap();
9318        match result {
9319            QueryResult::Rows { rows, .. } => assert_eq!(rows.len(), 0),
9320            _ => panic!("expected rows"),
9321        }
9322    }
9323
9324    // ─── Materialized view tests ────────────────────────────────────────────
9325
9326    #[test]
9327    fn test_create_materialized_view() {
9328        let mut engine = test_engine();
9329        let result = engine
9330            .execute_powql(r#"materialize OldUsers as User filter .age > 28"#)
9331            .unwrap();
9332        match result {
9333            QueryResult::Executed { message } => {
9334                assert!(message.contains("OldUsers"));
9335            }
9336            _ => panic!("expected Executed"),
9337        }
9338        // Query the view like a table.
9339        let result = engine.execute_powql("OldUsers").unwrap();
9340        match result {
9341            QueryResult::Rows { rows, .. } => {
9342                assert_eq!(rows.len(), 2); // Alice (30) and Charlie (35)
9343            }
9344            _ => panic!("expected rows"),
9345        }
9346    }
9347
9348    #[test]
9349    fn test_view_auto_refresh_on_insert() {
9350        let mut engine = test_engine();
9351        engine
9352            .execute_powql(r#"materialize OldUsers as User filter .age > 28"#)
9353            .unwrap();
9354        // Insert a new qualifying row.
9355        engine
9356            .execute_powql(r#"insert User { name := "Dave", email := "dave@ex.com", age := 40 }"#)
9357            .unwrap();
9358        // The view should auto-refresh and include Dave.
9359        let result = engine.execute_powql("OldUsers").unwrap();
9360        match result {
9361            QueryResult::Rows { rows, .. } => {
9362                assert_eq!(rows.len(), 3); // Alice, Charlie, Dave
9363            }
9364            _ => panic!("expected rows"),
9365        }
9366    }
9367
9368    #[test]
9369    fn test_view_auto_refresh_on_delete() {
9370        let mut engine = test_engine();
9371        engine
9372            .execute_powql(r#"materialize OldUsers as User filter .age > 28"#)
9373            .unwrap();
9374        // Delete Alice (age 30) from the base table.
9375        engine
9376            .execute_powql(r#"User filter .name = "Alice" delete"#)
9377            .unwrap();
9378        // View should auto-refresh: only Charlie remains.
9379        let result = engine.execute_powql("OldUsers").unwrap();
9380        match result {
9381            QueryResult::Rows { rows, .. } => {
9382                assert_eq!(rows.len(), 1);
9383            }
9384            _ => panic!("expected rows"),
9385        }
9386    }
9387
9388    #[test]
9389    fn test_view_auto_refresh_on_update() {
9390        let mut engine = test_engine();
9391        engine
9392            .execute_powql(r#"materialize OldUsers as User filter .age > 28"#)
9393            .unwrap();
9394        // Update Bob's age to make him qualify.
9395        engine
9396            .execute_powql(r#"User filter .name = "Bob" update { age := 50 }"#)
9397            .unwrap();
9398        let result = engine.execute_powql("OldUsers").unwrap();
9399        match result {
9400            QueryResult::Rows { rows, .. } => {
9401                assert_eq!(rows.len(), 3); // Alice, Charlie, Bob
9402            }
9403            _ => panic!("expected rows"),
9404        }
9405    }
9406
9407    #[test]
9408    fn test_explicit_refresh() {
9409        let mut engine = test_engine();
9410        engine
9411            .execute_powql(r#"materialize OldUsers as User filter .age > 28"#)
9412            .unwrap();
9413        engine
9414            .execute_powql(r#"insert User { name := "Eve", email := "eve@ex.com", age := 55 }"#)
9415            .unwrap();
9416        // Explicit refresh.
9417        let result = engine.execute_powql("refresh OldUsers").unwrap();
9418        match result {
9419            QueryResult::Executed { message } => {
9420                assert!(message.contains("refreshed"));
9421            }
9422            _ => panic!("expected Executed"),
9423        }
9424        // Now query — should include Eve.
9425        let result = engine.execute_powql("OldUsers").unwrap();
9426        match result {
9427            QueryResult::Rows { rows, .. } => {
9428                assert_eq!(rows.len(), 3);
9429            }
9430            _ => panic!("expected rows"),
9431        }
9432    }
9433
9434    #[test]
9435    fn test_drop_view() {
9436        let mut engine = test_engine();
9437        engine
9438            .execute_powql(r#"materialize OldUsers as User filter .age > 28"#)
9439            .unwrap();
9440        let result = engine.execute_powql("drop view OldUsers").unwrap();
9441        match result {
9442            QueryResult::Executed { message } => {
9443                assert!(message.contains("dropped"));
9444            }
9445            _ => panic!("expected Executed"),
9446        }
9447        // Querying the dropped view should fail.
9448        let err = engine.execute_powql("OldUsers").unwrap_err();
9449        assert!(err.contains("not found"));
9450    }
9451
9452    #[test]
9453    fn test_view_with_projection() {
9454        let mut engine = test_engine();
9455        engine
9456            .execute_powql(r#"materialize UserNames as User { .name }"#)
9457            .unwrap();
9458        let result = engine.execute_powql("UserNames").unwrap();
9459        match result {
9460            QueryResult::Rows { columns, rows } => {
9461                assert_eq!(columns, vec!["name".to_string()]);
9462                assert_eq!(rows.len(), 3);
9463            }
9464            _ => panic!("expected rows"),
9465        }
9466    }
9467
9468    #[test]
9469    fn test_view_no_stale_reads() {
9470        let mut engine = test_engine();
9471        engine
9472            .execute_powql(r#"materialize AllUsers as User"#)
9473            .unwrap();
9474        // Verify initial state.
9475        let result = engine.execute_powql("AllUsers").unwrap();
9476        match &result {
9477            QueryResult::Rows { rows, .. } => assert_eq!(rows.len(), 3),
9478            _ => panic!("expected rows"),
9479        }
9480        // Insert two more.
9481        engine
9482            .execute_powql(r#"insert User { name := "D", email := "d@ex.com", age := 1 }"#)
9483            .unwrap();
9484        engine
9485            .execute_powql(r#"insert User { name := "E", email := "e@ex.com", age := 2 }"#)
9486            .unwrap();
9487        // First insert marks dirty, second stays dirty. Auto-refresh fires on read.
9488        let result = engine.execute_powql("AllUsers").unwrap();
9489        match result {
9490            QueryResult::Rows { rows, .. } => assert_eq!(rows.len(), 5),
9491            _ => panic!("expected rows"),
9492        }
9493    }
9494
9495    #[test]
9496    fn test_duplicate_view_creation_fails() {
9497        let mut engine = test_engine();
9498        engine.execute_powql(r#"materialize V as User"#).unwrap();
9499        let err = engine
9500            .execute_powql(r#"materialize V as User"#)
9501            .unwrap_err();
9502        assert!(err.contains("already exists"));
9503    }
9504
9505    #[test]
9506    fn test_drop_nonexistent_view_fails() {
9507        let mut engine = test_engine();
9508        let err = engine.execute_powql("drop view NoSuchView").unwrap_err();
9509        assert!(err.contains("not found"));
9510    }
9511
9512    // ── UNION / UNION ALL tests ────────────────────────────────
9513
9514    #[test]
9515    fn test_union_deduplicates() {
9516        let mut engine = test_engine();
9517        engine.execute_powql("type A { name: str }").unwrap();
9518        engine.execute_powql("type B { name: str }").unwrap();
9519        engine
9520            .execute_powql(r#"insert A { name := "alice" }"#)
9521            .unwrap();
9522        engine
9523            .execute_powql(r#"insert A { name := "bob" }"#)
9524            .unwrap();
9525        engine
9526            .execute_powql(r#"insert B { name := "bob" }"#)
9527            .unwrap();
9528        engine
9529            .execute_powql(r#"insert B { name := "carol" }"#)
9530            .unwrap();
9531        let result = engine.execute_powql("A union B").unwrap();
9532        let rows = match result {
9533            QueryResult::Rows { rows, .. } => rows,
9534            _ => panic!(),
9535        };
9536        // alice, bob, carol — bob deduped
9537        assert_eq!(rows.len(), 3);
9538    }
9539
9540    #[test]
9541    fn test_union_all_keeps_duplicates() {
9542        let mut engine = test_engine();
9543        engine.execute_powql("type X { val: int }").unwrap();
9544        engine.execute_powql("type Y { val: int }").unwrap();
9545        engine.execute_powql("insert X { val := 1 }").unwrap();
9546        engine.execute_powql("insert X { val := 2 }").unwrap();
9547        engine.execute_powql("insert Y { val := 2 }").unwrap();
9548        engine.execute_powql("insert Y { val := 3 }").unwrap();
9549        let result = engine.execute_powql("X union all Y").unwrap();
9550        let rows = match result {
9551            QueryResult::Rows { rows, .. } => rows,
9552            _ => panic!(),
9553        };
9554        // 1, 2, 2, 3 — no dedup
9555        assert_eq!(rows.len(), 4);
9556    }
9557
9558    #[test]
9559    fn test_union_with_filters() {
9560        let mut engine = test_engine();
9561        engine
9562            .execute_powql("type Emp { name: str, dept: str }")
9563            .unwrap();
9564        engine
9565            .execute_powql(r#"insert Emp { name := "alice", dept := "eng" }"#)
9566            .unwrap();
9567        engine
9568            .execute_powql(r#"insert Emp { name := "bob", dept := "sales" }"#)
9569            .unwrap();
9570        engine
9571            .execute_powql(r#"insert Emp { name := "carol", dept := "eng" }"#)
9572            .unwrap();
9573        let result = engine
9574            .execute_powql(r#"Emp filter .dept = "eng" union Emp filter .dept = "sales""#)
9575            .unwrap();
9576        let rows = match result {
9577            QueryResult::Rows { rows, .. } => rows,
9578            _ => panic!(),
9579        };
9580        assert_eq!(rows.len(), 3);
9581    }
9582
9583    #[test]
9584    fn test_union_chain_three_tables() {
9585        let mut engine = test_engine();
9586        engine.execute_powql("type T1 { v: int }").unwrap();
9587        engine.execute_powql("type T2 { v: int }").unwrap();
9588        engine.execute_powql("type T3 { v: int }").unwrap();
9589        engine.execute_powql("insert T1 { v := 1 }").unwrap();
9590        engine.execute_powql("insert T2 { v := 2 }").unwrap();
9591        engine.execute_powql("insert T3 { v := 3 }").unwrap();
9592        let result = engine.execute_powql("T1 union T2 union T3").unwrap();
9593        let rows = match result {
9594            QueryResult::Rows { rows, .. } => rows,
9595            _ => panic!(),
9596        };
9597        assert_eq!(rows.len(), 3);
9598    }
9599
9600    #[test]
9601    fn test_union_uses_left_side_columns() {
9602        let mut engine = test_engine();
9603        engine.execute_powql("type L { name: str }").unwrap();
9604        engine.execute_powql("type R { name: str }").unwrap();
9605        engine.execute_powql(r#"insert L { name := "a" }"#).unwrap();
9606        engine.execute_powql(r#"insert R { name := "b" }"#).unwrap();
9607        let result = engine.execute_powql("L union R").unwrap();
9608        match result {
9609            QueryResult::Rows { columns, rows } => {
9610                assert_eq!(columns, vec!["name".to_string()]);
9611                assert_eq!(rows.len(), 2);
9612            }
9613            _ => panic!("expected rows"),
9614        }
9615    }
9616
9617    // ── COUNT DISTINCT tests ───────────────────────────────────
9618
9619    #[test]
9620    fn test_count_distinct_standalone() {
9621        let mut engine = test_engine();
9622        engine.execute_powql("type Color { name: str }").unwrap();
9623        engine
9624            .execute_powql(r#"insert Color { name := "red" }"#)
9625            .unwrap();
9626        engine
9627            .execute_powql(r#"insert Color { name := "blue" }"#)
9628            .unwrap();
9629        engine
9630            .execute_powql(r#"insert Color { name := "red" }"#)
9631            .unwrap();
9632        engine
9633            .execute_powql(r#"insert Color { name := "green" }"#)
9634            .unwrap();
9635        let result = engine
9636            .execute_powql("count(distinct Color { .name })")
9637            .unwrap();
9638        match result {
9639            QueryResult::Scalar(Value::Int(n)) => assert_eq!(n, 3), // red, blue, green
9640            _ => panic!("expected scalar int"),
9641        }
9642    }
9643
9644    #[test]
9645    fn test_count_distinct_in_group_by() {
9646        let mut engine = test_engine();
9647        engine
9648            .execute_powql("type Sale { dept: str, item: str }")
9649            .unwrap();
9650        engine
9651            .execute_powql(r#"insert Sale { dept := "eng", item := "laptop" }"#)
9652            .unwrap();
9653        engine
9654            .execute_powql(r#"insert Sale { dept := "eng", item := "laptop" }"#)
9655            .unwrap();
9656        engine
9657            .execute_powql(r#"insert Sale { dept := "eng", item := "monitor" }"#)
9658            .unwrap();
9659        engine
9660            .execute_powql(r#"insert Sale { dept := "sales", item := "phone" }"#)
9661            .unwrap();
9662        let result = engine
9663            .execute_powql("Sale group .dept { .dept, count(distinct .item) }")
9664            .unwrap();
9665        let rows = match result {
9666            QueryResult::Rows { rows, .. } => rows,
9667            _ => panic!(),
9668        };
9669        // eng: 2 distinct items (laptop, monitor), sales: 1 (phone)
9670        let eng_row = rows
9671            .iter()
9672            .find(|r| r[0] == Value::Str("eng".into()))
9673            .unwrap();
9674        let sales_row = rows
9675            .iter()
9676            .find(|r| r[0] == Value::Str("sales".into()))
9677            .unwrap();
9678        assert_eq!(eng_row[1], Value::Int(2));
9679        assert_eq!(sales_row[1], Value::Int(1));
9680    }
9681
9682    #[test]
9683    fn test_count_distinct_with_filter() {
9684        let mut engine = test_engine();
9685        // Use test_engine which creates User with name, email, age
9686        engine
9687            .execute_powql(r#"insert User { name := "Dave", email := "d@e.com", age := 30 }"#)
9688            .unwrap();
9689        let result = engine
9690            .execute_powql("count(distinct User { .age })")
9691            .unwrap();
9692        match result {
9693            QueryResult::Scalar(Value::Int(n)) => {
9694                // 30(alice), 25(bob), 35(charlie), 30(dave) → 3 distinct
9695                assert_eq!(n, 3);
9696            }
9697            _ => panic!("expected scalar int"),
9698        }
9699    }
9700
9701    // ── UPDATE with expressions tests ──────────────────────────
9702
9703    #[test]
9704    fn test_update_with_arithmetic_expression() {
9705        let mut engine = test_engine();
9706        // Alice starts at age 30
9707        engine
9708            .execute_powql(r#"User filter .name = "Alice" update { age := .age + 5 }"#)
9709            .unwrap();
9710        let result = engine
9711            .execute_powql(r#"User filter .name = "Alice""#)
9712            .unwrap();
9713        let rows = match result {
9714            QueryResult::Rows { rows, .. } => rows,
9715            _ => panic!(),
9716        };
9717        assert_eq!(rows[0][2], Value::Int(35)); // 30 + 5 = 35
9718    }
9719
9720    #[test]
9721    fn test_update_with_multiply_expression() {
9722        let mut engine = test_engine();
9723        // Double everyone's age
9724        engine
9725            .execute_powql("User update { age := .age * 2 }")
9726            .unwrap();
9727        let result = engine.execute_powql("User").unwrap();
9728        let rows = match result {
9729            QueryResult::Rows { rows, .. } => rows,
9730            _ => panic!(),
9731        };
9732        let ages: Vec<i64> = rows
9733            .iter()
9734            .map(|r| match &r[2] {
9735                Value::Int(v) => *v,
9736                _ => 0,
9737            })
9738            .collect();
9739        assert!(ages.contains(&60)); // Alice: 30*2
9740        assert!(ages.contains(&50)); // Bob: 25*2
9741        assert!(ages.contains(&70)); // Charlie: 35*2
9742    }
9743
9744    #[test]
9745    fn test_update_expression_with_filter() {
9746        let mut engine = test_engine();
9747        // Increment age only for people over 28
9748        engine
9749            .execute_powql("User filter .age > 28 update { age := .age + 1 }")
9750            .unwrap();
9751        let result = engine
9752            .execute_powql(r#"User filter .name = "Alice""#)
9753            .unwrap();
9754        let rows = match result {
9755            QueryResult::Rows { rows, .. } => rows,
9756            _ => panic!(),
9757        };
9758        assert_eq!(rows[0][2], Value::Int(31)); // Alice was 30, now 31
9759        let result = engine
9760            .execute_powql(r#"User filter .name = "Bob""#)
9761            .unwrap();
9762        let rows = match result {
9763            QueryResult::Rows { rows, .. } => rows,
9764            _ => panic!(),
9765        };
9766        assert_eq!(rows[0][2], Value::Int(25)); // Bob was 25, unchanged
9767    }
9768
9769    #[test]
9770    fn test_update_literal_still_uses_fast_path() {
9771        // Verify the literal path still works after the refactor
9772        let mut engine = test_engine();
9773        engine
9774            .execute_powql(r#"User filter .name = "Alice" update { age := 99 }"#)
9775            .unwrap();
9776        let result = engine
9777            .execute_powql(r#"User filter .name = "Alice""#)
9778            .unwrap();
9779        let rows = match result {
9780            QueryResult::Rows { rows, .. } => rows,
9781            _ => panic!(),
9782        };
9783        assert_eq!(rows[0][2], Value::Int(99));
9784    }
9785
9786    // ── COUNT(*) in GROUP BY tests ─────────────────────────────
9787
9788    #[test]
9789    fn test_group_by_count_star() {
9790        let mut engine = test_engine();
9791        // test_engine has 3 users: Alice(30), Bob(25), Charlie(35)
9792        // Add another user with same age as Alice
9793        engine
9794            .execute_powql(r#"insert User { name := "Dave", email := "d@e.com", age := 30 }"#)
9795            .unwrap();
9796        let result = engine
9797            .execute_powql("User group .age { .age, count(*) }")
9798            .unwrap();
9799        let rows = match result {
9800            QueryResult::Rows { rows, .. } => rows,
9801            _ => panic!(),
9802        };
9803        let age30 = rows.iter().find(|r| r[0] == Value::Int(30)).unwrap();
9804        assert_eq!(age30[1], Value::Int(2)); // Alice + Dave
9805        let age25 = rows.iter().find(|r| r[0] == Value::Int(25)).unwrap();
9806        assert_eq!(age25[1], Value::Int(1)); // Bob only
9807    }
9808
9809    #[test]
9810    fn test_group_by_count_star_with_having() {
9811        let mut engine = test_engine();
9812        engine
9813            .execute_powql(r#"insert User { name := "Dave", email := "d@e.com", age := 30 }"#)
9814            .unwrap();
9815        let result = engine
9816            .execute_powql("User group .age having count(*) > 1 { .age, count(*) }")
9817            .unwrap();
9818        let rows = match result {
9819            QueryResult::Rows { rows, .. } => rows,
9820            _ => panic!(),
9821        };
9822        assert_eq!(rows.len(), 1);
9823        assert_eq!(rows[0][0], Value::Int(30)); // only age=30 has count > 1
9824    }
9825
9826    // ── Mixed-type arithmetic (Int <-> Float) regression tests ─────────
9827
9828    /// Engine with a Product type containing price:float + stock:int.
9829    /// Exercises mixed numeric promotion in `eval_binop`.
9830    fn product_mix_engine() -> Engine {
9831        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
9832        let dir =
9833            std::env::temp_dir().join(format!("powdb_product_mix_{}_{}", std::process::id(), id));
9834        let mut engine = Engine::new(&dir).unwrap();
9835        engine
9836            .execute_powql(
9837                "type Product { required name: str, required price: float, required stock: int }",
9838            )
9839            .unwrap();
9840        engine
9841            .execute_powql(r#"insert Product { name := "Apple",  price := 1.5, stock := 10 }"#)
9842            .unwrap();
9843        engine
9844            .execute_powql(r#"insert Product { name := "Banana", price := 0.25, stock := 4 }"#)
9845            .unwrap();
9846        engine
9847            .execute_powql(r#"insert Product { name := "Cherry", price := 2.0, stock := 3 }"#)
9848            .unwrap();
9849        engine
9850    }
9851
9852    fn as_float(v: &Value) -> f64 {
9853        match v {
9854            Value::Float(f) => *f,
9855            other => panic!("expected Float, got {other:?}"),
9856        }
9857    }
9858
9859    #[test]
9860    fn test_arith_float_times_int() {
9861        let mut engine = product_mix_engine();
9862        let result = engine
9863            .execute_powql("Product { .name, total: .price * .stock }")
9864            .unwrap();
9865        match result {
9866            QueryResult::Rows { columns, rows } => {
9867                assert_eq!(columns, vec!["name", "total"]);
9868                let mut by_name: std::collections::HashMap<String, f64> =
9869                    std::collections::HashMap::new();
9870                for row in &rows {
9871                    let name = match &row[0] {
9872                        Value::Str(s) => s.clone(),
9873                        _ => panic!(),
9874                    };
9875                    by_name.insert(name, as_float(&row[1]));
9876                }
9877                assert!((by_name["Apple"] - 15.0).abs() < 1e-9);
9878                assert!((by_name["Banana"] - 1.0).abs() < 1e-9);
9879                assert!((by_name["Cherry"] - 6.0).abs() < 1e-9);
9880            }
9881            _ => panic!("expected rows"),
9882        }
9883    }
9884
9885    #[test]
9886    fn test_arith_int_plus_float() {
9887        let mut engine = product_mix_engine();
9888        // stock:int + price:float → should promote to float
9889        let result = engine
9890            .execute_powql("Product { .name, bumped: .stock + .price }")
9891            .unwrap();
9892        match result {
9893            QueryResult::Rows { rows, .. } => {
9894                let mut by_name: std::collections::HashMap<String, f64> =
9895                    std::collections::HashMap::new();
9896                for row in &rows {
9897                    let name = match &row[0] {
9898                        Value::Str(s) => s.clone(),
9899                        _ => panic!(),
9900                    };
9901                    by_name.insert(name, as_float(&row[1]));
9902                }
9903                assert!((by_name["Apple"] - 11.5).abs() < 1e-9);
9904                assert!((by_name["Banana"] - 4.25).abs() < 1e-9);
9905                assert!((by_name["Cherry"] - 5.0).abs() < 1e-9);
9906            }
9907            _ => panic!("expected rows"),
9908        }
9909    }
9910
9911    #[test]
9912    fn test_arith_float_div_int() {
9913        let mut engine = product_mix_engine();
9914        let result = engine
9915            .execute_powql("Product { .name, unit: .price / .stock }")
9916            .unwrap();
9917        match result {
9918            QueryResult::Rows { rows, .. } => {
9919                let mut by_name: std::collections::HashMap<String, f64> =
9920                    std::collections::HashMap::new();
9921                for row in &rows {
9922                    let name = match &row[0] {
9923                        Value::Str(s) => s.clone(),
9924                        _ => panic!(),
9925                    };
9926                    by_name.insert(name, as_float(&row[1]));
9927                }
9928                assert!((by_name["Apple"] - 0.15).abs() < 1e-9);
9929                assert!((by_name["Banana"] - 0.0625).abs() < 1e-9);
9930                assert!((by_name["Cherry"] - (2.0 / 3.0)).abs() < 1e-9);
9931            }
9932            _ => panic!("expected rows"),
9933        }
9934    }
9935
9936    #[test]
9937    fn test_arith_int_minus_float() {
9938        let mut engine = product_mix_engine();
9939        let result = engine
9940            .execute_powql("Product { .name, delta: .stock - .price }")
9941            .unwrap();
9942        match result {
9943            QueryResult::Rows { rows, .. } => {
9944                let mut by_name: std::collections::HashMap<String, f64> =
9945                    std::collections::HashMap::new();
9946                for row in &rows {
9947                    let name = match &row[0] {
9948                        Value::Str(s) => s.clone(),
9949                        _ => panic!(),
9950                    };
9951                    by_name.insert(name, as_float(&row[1]));
9952                }
9953                assert!((by_name["Apple"] - 8.5).abs() < 1e-9);
9954                assert!((by_name["Banana"] - 3.75).abs() < 1e-9);
9955                assert!((by_name["Cherry"] - 1.0).abs() < 1e-9);
9956            }
9957            _ => panic!("expected rows"),
9958        }
9959    }
9960
9961    // Regression: sum() on a Float column must return the actual
9962    // floating-point sum, not Int(0). The old slow-path loops filtered
9963    // out Value::Float and only summed Ints, silently dropping every
9964    // value in a Float column.
9965    #[test]
9966    fn test_sum_float_scalar() {
9967        let mut engine = product_mix_engine();
9968        let result = engine.execute_powql("sum(Product { .price })").unwrap();
9969        match result {
9970            QueryResult::Scalar(v) => {
9971                // 1.5 + 0.25 + 2.0 = 3.75
9972                assert!(
9973                    (as_float(&v) - 3.75).abs() < 1e-9,
9974                    "expected 3.75, got {v:?}"
9975                );
9976            }
9977            _ => panic!("expected scalar result, got {result:?}"),
9978        }
9979    }
9980
9981    // Regression: sum() of a Float column inside a GROUP BY must work
9982    // the same way. compute_group_aggregate had the identical Int-only
9983    // bug as the scalar path.
9984    #[test]
9985    fn test_sum_float_group_by() {
9986        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
9987        let dir =
9988            std::env::temp_dir().join(format!("powdb_sum_float_gb_{}_{}", std::process::id(), id));
9989        let mut engine = Engine::new(&dir).unwrap();
9990        engine
9991            .execute_powql("type Sale { required region: str, required amount: float }")
9992            .unwrap();
9993        engine
9994            .execute_powql(r#"insert Sale { region := "E", amount := 1.5 }"#)
9995            .unwrap();
9996        engine
9997            .execute_powql(r#"insert Sale { region := "E", amount := 2.25 }"#)
9998            .unwrap();
9999        engine
10000            .execute_powql(r#"insert Sale { region := "W", amount := 4.0 }"#)
10001            .unwrap();
10002        engine
10003            .execute_powql(r#"insert Sale { region := "W", amount := 0.5 }"#)
10004            .unwrap();
10005
10006        let result = engine
10007            .execute_powql("Sale group .region { .region, total: sum(.amount) }")
10008            .unwrap();
10009        match result {
10010            QueryResult::Rows { columns, rows } => {
10011                assert_eq!(columns, vec!["region", "total"]);
10012                let mut by_region: std::collections::HashMap<String, f64> =
10013                    std::collections::HashMap::new();
10014                for row in &rows {
10015                    let region = match &row[0] {
10016                        Value::Str(s) => s.clone(),
10017                        _ => panic!(),
10018                    };
10019                    by_region.insert(region, as_float(&row[1]));
10020                }
10021                assert!(
10022                    (by_region["E"] - 3.75).abs() < 1e-9,
10023                    "E: {:?}",
10024                    by_region.get("E")
10025                );
10026                assert!(
10027                    (by_region["W"] - 4.5).abs() < 1e-9,
10028                    "W: {:?}",
10029                    by_region.get("W")
10030                );
10031            }
10032            _ => panic!("expected rows, got {result:?}"),
10033        }
10034    }
10035
10036    // ─── Mission D10: Float fast-path parity ─────────────────────────────
10037    //
10038    // Prior to D10, three hot paths in the executor bailed on Float columns:
10039    //   1. `agg_single_col_fast` — sum/avg/min/max/count fell through to the
10040    //      generic row-decoding path (allocates Vec<Value> per row).
10041    //   2. `project_filter_sort_limit_fast` — top-N by Float column fell
10042    //      through the generic sort path.
10043    //   3. `compile_predicate` / `build_int_leaf` — WHERE on Float columns
10044    //      couldn't compile, so the whole filter walked Value::cmp.
10045    //
10046    // These tests exercise each Float fast path end-to-end, including NaN
10047    // handling via `total_cmp` (which matches `Value::Ord` so semantics are
10048    // identical between fast-path and generic-path reads).
10049
10050    /// Engine with a Price table: price:float, qty:int. Eight rows with a
10051    /// deliberate spread of values, a NaN, a negative, -0.0, and a null.
10052    /// The null exercises the bitmap-skip branch; NaN and -0.0 exercise
10053    /// the `total_cmp` invariant.
10054    fn float_fast_engine() -> Engine {
10055        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
10056        let dir =
10057            std::env::temp_dir().join(format!("powdb_float_fast_{}_{}", std::process::id(), id));
10058        let mut engine = Engine::new(&dir).unwrap();
10059        engine
10060            .execute_powql("type Price { required name: str, price: float, required qty: int }")
10061            .unwrap();
10062        // Insertion order deliberately scrambled so top-N doesn't trivially
10063        // match insertion order.
10064        let rows = [
10065            ("a", "price := 1.5", "qty := 1"),
10066            ("b", "price := 0.25", "qty := 2"),
10067            ("c", "price := 2.0", "qty := 3"),
10068            ("d", "price := -3.5", "qty := 4"),
10069            ("e", "price := 10.0", "qty := 5"),
10070            ("f", "price := 0.5", "qty := 6"),
10071            ("g", "price := 100.0", "qty := 7"),
10072            ("h", "price := -0.0", "qty := 8"),
10073        ];
10074        for (name, price, qty) in rows {
10075            engine
10076                .execute_powql(&format!(
10077                    r#"insert Price {{ name := "{name}", {price}, {qty} }}"#
10078                ))
10079                .unwrap();
10080        }
10081        engine
10082    }
10083
10084    #[test]
10085    fn test_d10_agg_sum_float_fast_path() {
10086        let mut engine = float_fast_engine();
10087        let result = engine.execute_powql("sum(Price { .price })").unwrap();
10088        // 1.5 + 0.25 + 2.0 + -3.5 + 10.0 + 0.5 + 100.0 + -0.0 = 110.75
10089        match result {
10090            QueryResult::Scalar(v) => {
10091                assert!((as_float(&v) - 110.75).abs() < 1e-9, "got {v:?}");
10092            }
10093            _ => panic!("expected scalar, got {result:?}"),
10094        }
10095    }
10096
10097    #[test]
10098    fn test_d10_agg_avg_float_fast_path() {
10099        let mut engine = float_fast_engine();
10100        let result = engine.execute_powql("avg(Price { .price })").unwrap();
10101        // 110.75 / 8 = 13.84375
10102        match result {
10103            QueryResult::Scalar(v) => {
10104                assert!((as_float(&v) - 13.84375).abs() < 1e-9, "got {v:?}");
10105            }
10106            _ => panic!("expected scalar, got {result:?}"),
10107        }
10108    }
10109
10110    #[test]
10111    fn test_d10_agg_min_float_fast_path() {
10112        let mut engine = float_fast_engine();
10113        let result = engine.execute_powql("min(Price { .price })").unwrap();
10114        match result {
10115            QueryResult::Scalar(v) => {
10116                assert!((as_float(&v) - (-3.5)).abs() < 1e-9, "got {v:?}");
10117            }
10118            _ => panic!("expected scalar, got {result:?}"),
10119        }
10120    }
10121
10122    #[test]
10123    fn test_d10_agg_max_float_fast_path() {
10124        let mut engine = float_fast_engine();
10125        let result = engine.execute_powql("max(Price { .price })").unwrap();
10126        match result {
10127            QueryResult::Scalar(v) => {
10128                assert!((as_float(&v) - 100.0).abs() < 1e-9, "got {v:?}");
10129            }
10130            _ => panic!("expected scalar, got {result:?}"),
10131        }
10132    }
10133
10134    #[test]
10135    fn test_d10_agg_count_distinct_float_fast_path() {
10136        let mut engine = float_fast_engine();
10137        let result = engine
10138            .execute_powql("count(distinct Price { .price })")
10139            .unwrap();
10140        // All 8 prices are distinct (+0.0 isn't present; -0.0 is, and
10141        // distinct from every other value). Hash via to_bits so -0.0 and
10142        // +0.0 would count separately — matches Value::Hash.
10143        match result {
10144            QueryResult::Scalar(Value::Int(n)) => assert_eq!(n, 8, "got {n}"),
10145            _ => panic!("expected scalar int, got {result:?}"),
10146        }
10147    }
10148
10149    #[test]
10150    fn test_d10_agg_float_with_compiled_where() {
10151        // Exercises `build_float_leaf` — WHERE .price > 1.0 must compile,
10152        // and the Float fast path must use it to short-circuit rows.
10153        let mut engine = float_fast_engine();
10154        let result = engine
10155            .execute_powql("sum(Price filter .price > 1.0 { .price })")
10156            .unwrap();
10157        // Rows > 1.0: 1.5, 2.0, 10.0, 100.0 → sum = 113.5
10158        match result {
10159            QueryResult::Scalar(v) => {
10160                assert!((as_float(&v) - 113.5).abs() < 1e-9, "got {v:?}");
10161            }
10162            _ => panic!("expected scalar, got {result:?}"),
10163        }
10164    }
10165
10166    #[test]
10167    fn test_d10_agg_float_with_compiled_where_int_literal() {
10168        // Novel cross-type: WHERE .price > 1 (Int literal on Float column)
10169        // must still compile via build_float_leaf — the Int literal is
10170        // promoted to f64 at compile time so the hot loop only sees f64.
10171        let mut engine = float_fast_engine();
10172        let result = engine
10173            .execute_powql("sum(Price filter .price > 1 { .price })")
10174            .unwrap();
10175        match result {
10176            QueryResult::Scalar(v) => {
10177                assert!((as_float(&v) - 113.5).abs() < 1e-9, "got {v:?}");
10178            }
10179            _ => panic!("expected scalar, got {result:?}"),
10180        }
10181    }
10182
10183    #[test]
10184    fn test_d10_agg_float_with_reversed_literal() {
10185        // `100.0 > .price` (literal on LHS) must also compile. The
10186        // build_float_leaf flips the operator so the field is always LHS.
10187        let mut engine = float_fast_engine();
10188        let result = engine
10189            .execute_powql("count(Price filter 1.0 < .price { .price })")
10190            .unwrap();
10191        // Rows where 1.0 < .price: 1.5, 2.0, 10.0, 100.0 → count = 4
10192        match result {
10193            QueryResult::Scalar(Value::Int(n)) => assert_eq!(n, 4, "got {n}"),
10194            _ => panic!("expected scalar int, got {result:?}"),
10195        }
10196    }
10197
10198    #[test]
10199    fn test_d10_sort_float_desc_limit_fast_path() {
10200        // Top-3 by price descending — exercises the Float branch of
10201        // project_filter_sort_limit_fast with the sortable-u64 transform.
10202        let mut engine = float_fast_engine();
10203        let result = engine
10204            .execute_powql("Price order .price desc limit 3 { .name, .price }")
10205            .unwrap();
10206        match result {
10207            QueryResult::Rows { columns, rows } => {
10208                assert_eq!(columns, vec!["name", "price"]);
10209                assert_eq!(rows.len(), 3);
10210                assert_eq!(rows[0][0], Value::Str("g".into())); // 100.0
10211                assert!((as_float(&rows[0][1]) - 100.0).abs() < 1e-9);
10212                assert_eq!(rows[1][0], Value::Str("e".into())); // 10.0
10213                assert!((as_float(&rows[1][1]) - 10.0).abs() < 1e-9);
10214                assert_eq!(rows[2][0], Value::Str("c".into())); // 2.0
10215                assert!((as_float(&rows[2][1]) - 2.0).abs() < 1e-9);
10216            }
10217            _ => panic!("expected rows, got {result:?}"),
10218        }
10219    }
10220
10221    #[test]
10222    fn test_d10_sort_float_asc_limit_fast_path() {
10223        // Bottom-3 by price — negative and -0.0 must order correctly.
10224        let mut engine = float_fast_engine();
10225        let result = engine
10226            .execute_powql("Price order .price limit 3 { .name, .price }")
10227            .unwrap();
10228        match result {
10229            QueryResult::Rows { rows, .. } => {
10230                assert_eq!(rows.len(), 3);
10231                assert_eq!(rows[0][0], Value::Str("d".into())); // -3.5
10232                                                                // -0.0 must come before +0.25 under total_cmp ordering.
10233                assert_eq!(rows[1][0], Value::Str("h".into())); // -0.0
10234                assert_eq!(rows[2][0], Value::Str("b".into())); // 0.25
10235            }
10236            _ => panic!("expected rows, got {result:?}"),
10237        }
10238    }
10239
10240    #[test]
10241    fn test_d10_sort_float_with_compiled_filter() {
10242        // Filter + sort + limit all on Float column — every fast path
10243        // fires on the same query.
10244        let mut engine = float_fast_engine();
10245        let result = engine
10246            .execute_powql("Price filter .price > 0.0 order .price desc limit 2 { .name }")
10247            .unwrap();
10248        match result {
10249            QueryResult::Rows { rows, .. } => {
10250                assert_eq!(rows.len(), 2);
10251                assert_eq!(rows[0][0], Value::Str("g".into())); // 100.0
10252                assert_eq!(rows[1][0], Value::Str("e".into())); // 10.0
10253            }
10254            _ => panic!("expected rows, got {result:?}"),
10255        }
10256    }
10257
10258    #[test]
10259    fn test_f64_sortable_transform_monotonic() {
10260        // The sortable-u64 transform must preserve total_cmp ordering.
10261        // Regression guard against accidentally breaking the clever
10262        // sign-flip trick in `f64_bits_to_sortable_u64`.
10263        let samples: [f64; 11] = [
10264            f64::NEG_INFINITY,
10265            -1e100,
10266            -1.0,
10267            -f64::MIN_POSITIVE,
10268            -0.0,
10269            0.0,
10270            f64::MIN_POSITIVE,
10271            1.0,
10272            1e100,
10273            f64::INFINITY,
10274            f64::NAN, // total_cmp says NaN > +∞
10275        ];
10276        let mut sorted = samples;
10277        sorted.sort_by(|a, b| a.total_cmp(b));
10278
10279        let as_sortable: Vec<u64> = sorted
10280            .iter()
10281            .map(|f| f64_bits_to_sortable_u64(f.to_bits()))
10282            .collect();
10283
10284        // Each u64 must be strictly greater than its predecessor, because
10285        // `total_cmp` places every sample at a distinct total-order slot.
10286        for pair in as_sortable.windows(2) {
10287            assert!(
10288                pair[0] < pair[1],
10289                "sortable u64 not monotonic: {:#x} >= {:#x}",
10290                pair[0],
10291                pair[1]
10292            );
10293        }
10294    }
10295
10296    // ─── EXPLAIN tests ─────────────────────────────────────────────────
10297
10298    #[test]
10299    fn test_explain_simple_scan() {
10300        let mut engine = test_engine();
10301        let result = engine.execute_powql("explain User").unwrap();
10302        match result {
10303            QueryResult::Rows { columns, rows } => {
10304                assert_eq!(columns, vec!["plan"]);
10305                assert!(!rows.is_empty());
10306                assert!(matches!(&rows[0][0], Value::Str(s) if s.contains("SeqScan")));
10307            }
10308            _ => panic!("expected rows"),
10309        }
10310    }
10311
10312    #[test]
10313    fn test_explain_filter() {
10314        let mut engine = test_engine();
10315        let result = engine
10316            .execute_powql("explain User filter .age > 30")
10317            .unwrap();
10318        match result {
10319            QueryResult::Rows { rows, .. } => {
10320                let plan_text: String = rows
10321                    .iter()
10322                    .map(|r| match &r[0] {
10323                        Value::Str(s) => s.as_str(),
10324                        _ => "",
10325                    })
10326                    .collect::<Vec<_>>()
10327                    .join("\n");
10328                assert!(
10329                    plan_text.contains("Filter"),
10330                    "plan should show Filter(SeqScan) after lowering unindexed RangeScan"
10331                );
10332            }
10333            _ => panic!("expected rows"),
10334        }
10335    }
10336
10337    #[test]
10338    fn test_explain_does_not_execute() {
10339        let mut engine = test_engine();
10340        // EXPLAIN should NOT actually insert a row.
10341        let result = engine
10342            .execute_powql(r#"explain insert User { name := "Zara", age := 99 }"#)
10343            .unwrap();
10344        match result {
10345            QueryResult::Rows { rows, .. } => {
10346                let plan_text: String = rows
10347                    .iter()
10348                    .map(|r| match &r[0] {
10349                        Value::Str(s) => s.as_str(),
10350                        _ => "",
10351                    })
10352                    .collect::<Vec<_>>()
10353                    .join("\n");
10354                assert!(plan_text.contains("Insert"));
10355            }
10356            _ => panic!("expected rows"),
10357        }
10358        // Verify no row was actually inserted.
10359        let result = engine.execute_powql("User { .name }").unwrap();
10360        match result {
10361            QueryResult::Rows { rows, .. } => {
10362                assert_eq!(rows.len(), 3, "should still have original 3 users");
10363            }
10364            _ => panic!("expected rows"),
10365        }
10366    }
10367
10368    // ─── Correlated subquery tests ──────────────────────────────────────
10369
10370    #[test]
10371    fn test_correlated_in_subquery() {
10372        let mut engine = test_engine();
10373        // Create an orders table with user_name to correlate on.
10374        engine
10375            .execute_powql("type UserOrder { required user_name: str, required total: int }")
10376            .unwrap();
10377        engine
10378            .execute_powql(r#"insert UserOrder { user_name := "Alice", total := 100 }"#)
10379            .unwrap();
10380        engine
10381            .execute_powql(r#"insert UserOrder { user_name := "Alice", total := 200 }"#)
10382            .unwrap();
10383        engine
10384            .execute_powql(r#"insert UserOrder { user_name := "Bob", total := 50 }"#)
10385            .unwrap();
10386
10387        // Correlated: for each User row, find orders where user_name = outer .name
10388        // The subquery references .name which is a User column, not a UserOrder column.
10389        let result = engine.execute_powql(
10390            "User filter .name in (UserOrder filter .user_name = .name { .user_name }) { .name }"
10391        ).unwrap();
10392        match result {
10393            QueryResult::Rows { rows, .. } => {
10394                assert_eq!(rows.len(), 2, "Alice and Bob have orders");
10395                let names: Vec<_> = rows.iter().map(|r| &r[0]).collect();
10396                assert!(names.contains(&&Value::Str("Alice".into())));
10397                assert!(names.contains(&&Value::Str("Bob".into())));
10398            }
10399            _ => panic!("expected rows"),
10400        }
10401    }
10402
10403    #[test]
10404    fn test_correlated_exists_subquery() {
10405        let mut engine = test_engine();
10406        engine
10407            .execute_powql("type UserOrder { required user_name: str, required total: int }")
10408            .unwrap();
10409        engine
10410            .execute_powql(r#"insert UserOrder { user_name := "Alice", total := 100 }"#)
10411            .unwrap();
10412        engine
10413            .execute_powql(r#"insert UserOrder { user_name := "Bob", total := 50 }"#)
10414            .unwrap();
10415
10416        // Correlated EXISTS: only Users who have at least one order.
10417        // .name in the subquery filter refers to the outer User's name column.
10418        let result = engine
10419            .execute_powql("User filter exists (UserOrder filter .user_name = .name) { .name }")
10420            .unwrap();
10421        match result {
10422            QueryResult::Rows { rows, .. } => {
10423                assert_eq!(rows.len(), 2, "Alice and Bob have orders");
10424                let names: Vec<_> = rows.iter().map(|r| &r[0]).collect();
10425                assert!(names.contains(&&Value::Str("Alice".into())));
10426                assert!(names.contains(&&Value::Str("Bob".into())));
10427            }
10428            _ => panic!("expected rows"),
10429        }
10430    }
10431
10432    #[test]
10433    fn test_correlated_not_exists_subquery() {
10434        let mut engine = test_engine();
10435        engine
10436            .execute_powql("type UserOrder { required user_name: str, required total: int }")
10437            .unwrap();
10438        engine
10439            .execute_powql(r#"insert UserOrder { user_name := "Alice", total := 100 }"#)
10440            .unwrap();
10441
10442        // NOT EXISTS: Users without orders (Bob and Charlie).
10443        let result = engine
10444            .execute_powql("User filter not exists (UserOrder filter .user_name = .name) { .name }")
10445            .unwrap();
10446        match result {
10447            QueryResult::Rows { rows, .. } => {
10448                assert_eq!(rows.len(), 2, "Bob and Charlie have no orders");
10449                let names: Vec<_> = rows.iter().map(|r| &r[0]).collect();
10450                assert!(names.contains(&&Value::Str("Bob".into())));
10451                assert!(names.contains(&&Value::Str("Charlie".into())));
10452            }
10453            _ => panic!("expected rows"),
10454        }
10455    }
10456
10457    // ─── CAST tests ───────────────────────────────────────────────────
10458
10459    #[test]
10460    fn test_cast_int_to_str() {
10461        let mut engine = test_engine();
10462        let result = engine
10463            .execute_powql(r#"User { s: cast(.age, "str") }"#)
10464            .unwrap();
10465        match result {
10466            QueryResult::Rows { rows, .. } => {
10467                assert_eq!(rows[0][0], Value::Str("30".into()));
10468                assert_eq!(rows[1][0], Value::Str("25".into()));
10469            }
10470            _ => panic!("expected rows"),
10471        }
10472    }
10473
10474    #[test]
10475    fn test_cast_str_to_int() {
10476        let mut engine = test_engine();
10477        engine
10478            .execute_powql(r#"type Numbers { required val: str }"#)
10479            .unwrap();
10480        engine
10481            .execute_powql(r#"insert Numbers { val := "42" }"#)
10482            .unwrap();
10483        let result = engine
10484            .execute_powql(r#"Numbers { n: cast(.val, "int") }"#)
10485            .unwrap();
10486        match result {
10487            QueryResult::Rows { rows, .. } => {
10488                assert_eq!(rows[0][0], Value::Int(42));
10489            }
10490            _ => panic!("expected rows"),
10491        }
10492    }
10493
10494    #[test]
10495    fn test_cast_float_to_int() {
10496        let mut engine = test_engine();
10497        engine
10498            .execute_powql("type Floats { required val: float }")
10499            .unwrap();
10500        engine
10501            .execute_powql("insert Floats { val := 3.7 }")
10502            .unwrap();
10503        let result = engine
10504            .execute_powql(r#"Floats { n: cast(.val, "int") }"#)
10505            .unwrap();
10506        match result {
10507            QueryResult::Rows { rows, .. } => {
10508                assert_eq!(rows[0][0], Value::Int(3));
10509            }
10510            _ => panic!("expected rows"),
10511        }
10512    }
10513
10514    #[test]
10515    fn test_cast_int_to_float() {
10516        let mut engine = test_engine();
10517        let result = engine
10518            .execute_powql(r#"User { f: cast(.age, "float") }"#)
10519            .unwrap();
10520        match result {
10521            QueryResult::Rows { rows, .. } => {
10522                assert_eq!(rows[0][0], Value::Float(30.0));
10523            }
10524            _ => panic!("expected rows"),
10525        }
10526    }
10527
10528    #[test]
10529    fn test_cast_int_to_bool() {
10530        let mut engine = test_engine();
10531        let result = engine
10532            .execute_powql(r#"User { b: cast(.age, "bool") }"#)
10533            .unwrap();
10534        match result {
10535            QueryResult::Rows { rows, .. } => {
10536                // age=30 -> true (non-zero)
10537                assert_eq!(rows[0][0], Value::Bool(true));
10538            }
10539            _ => panic!("expected rows"),
10540        }
10541    }
10542
10543    // ─── Math function tests ──────────────────────────────────────────
10544
10545    #[test]
10546    fn test_abs() {
10547        let mut engine = test_engine();
10548        engine
10549            .execute_powql("type Nums { required val: int }")
10550            .unwrap();
10551        engine.execute_powql("insert Nums { val := -42 }").unwrap();
10552        let result = engine.execute_powql("Nums { a: abs(.val) }").unwrap();
10553        match result {
10554            QueryResult::Rows { rows, .. } => {
10555                assert_eq!(rows[0][0], Value::Int(42));
10556            }
10557            _ => panic!("expected rows"),
10558        }
10559    }
10560
10561    #[test]
10562    fn test_round() {
10563        let mut engine = test_engine();
10564        engine
10565            .execute_powql("type Floats { required val: float }")
10566            .unwrap();
10567        engine
10568            .execute_powql("insert Floats { val := 7.56789 }")
10569            .unwrap();
10570        let result = engine
10571            .execute_powql("Floats { r: round(.val, 2) }")
10572            .unwrap();
10573        match result {
10574            QueryResult::Rows { rows, .. } => {
10575                assert_eq!(rows[0][0], Value::Float(7.57));
10576            }
10577            _ => panic!("expected rows"),
10578        }
10579    }
10580
10581    #[test]
10582    fn test_ceil_floor() {
10583        let mut engine = test_engine();
10584        engine
10585            .execute_powql("type Floats { required val: float }")
10586            .unwrap();
10587        engine
10588            .execute_powql("insert Floats { val := 3.2 }")
10589            .unwrap();
10590        let c = engine.execute_powql("Floats { c: ceil(.val) }").unwrap();
10591        let f = engine.execute_powql("Floats { f: floor(.val) }").unwrap();
10592        match (c, f) {
10593            (QueryResult::Rows { rows: cr, .. }, QueryResult::Rows { rows: fr, .. }) => {
10594                assert_eq!(cr[0][0], Value::Float(4.0));
10595                assert_eq!(fr[0][0], Value::Float(3.0));
10596            }
10597            _ => panic!("expected rows"),
10598        }
10599    }
10600
10601    #[test]
10602    fn test_sqrt() {
10603        let mut engine = test_engine();
10604        engine
10605            .execute_powql("type Nums { required val: int }")
10606            .unwrap();
10607        engine.execute_powql("insert Nums { val := 144 }").unwrap();
10608        let result = engine.execute_powql("Nums { s: sqrt(.val) }").unwrap();
10609        match result {
10610            QueryResult::Rows { rows, .. } => {
10611                assert_eq!(rows[0][0], Value::Float(12.0));
10612            }
10613            _ => panic!("expected rows"),
10614        }
10615    }
10616
10617    #[test]
10618    fn test_pow() {
10619        let mut engine = test_engine();
10620        engine
10621            .execute_powql("type Nums { required val: int }")
10622            .unwrap();
10623        engine.execute_powql("insert Nums { val := 3 }").unwrap();
10624        let result = engine.execute_powql("Nums { p: pow(.val, 4) }").unwrap();
10625        match result {
10626            QueryResult::Rows { rows, .. } => {
10627                assert_eq!(rows[0][0], Value::Int(81));
10628            }
10629            _ => panic!("expected rows"),
10630        }
10631    }
10632
10633    // ─── Date/time function tests ─────────────────────────────────────
10634
10635    #[test]
10636    fn test_now_returns_datetime() {
10637        let mut engine = test_engine();
10638        engine
10639            .execute_powql("type Events { required name: str }")
10640            .unwrap();
10641        engine
10642            .execute_powql(r#"insert Events { name := "test" }"#)
10643            .unwrap();
10644        let result = engine.execute_powql("Events { ts: now() }").unwrap();
10645        match result {
10646            QueryResult::Rows { rows, .. } => match &rows[0][0] {
10647                Value::DateTime(m) => assert!(*m > 0, "now() should return positive timestamp"),
10648                other => panic!("expected DateTime, got {other:?}"),
10649            },
10650            _ => panic!("expected rows"),
10651        }
10652    }
10653
10654    #[test]
10655    fn test_extract_from_datetime() {
10656        let mut engine = test_engine();
10657        engine
10658            .execute_powql("type Events { required ts: datetime }")
10659            .unwrap();
10660        // 2024-01-15 12:30:45 UTC in microseconds
10661        // 2024-01-15 = 19737 days since epoch
10662        // 19737 * 86400 = 1705276800 seconds + 12*3600 + 30*60 + 45 = 1705321845
10663        // * 1_000_000 = 1705321845000000
10664        engine
10665            .execute_powql("insert Events { ts := 1705321845000000 }")
10666            .unwrap();
10667        let result = engine.execute_powql(r#"Events { y: extract("year", .ts), m: extract("month", .ts), d: extract("day", .ts), h: extract("hour", .ts) }"#).unwrap();
10668        match result {
10669            QueryResult::Rows { rows, .. } => {
10670                assert_eq!(rows[0][0], Value::Int(2024));
10671                assert_eq!(rows[0][1], Value::Int(1));
10672                assert_eq!(rows[0][2], Value::Int(15));
10673                assert_eq!(rows[0][3], Value::Int(12));
10674            }
10675            _ => panic!("expected rows"),
10676        }
10677    }
10678
10679    #[test]
10680    fn test_date_add() {
10681        let mut engine = test_engine();
10682        engine
10683            .execute_powql("type Events { required ts: datetime }")
10684            .unwrap();
10685        let base = 1705321845000000_i64; // 2024-01-15 12:30:45 UTC
10686        engine
10687            .execute_powql(&format!("insert Events {{ ts := {base} }}"))
10688            .unwrap();
10689        let result = engine
10690            .execute_powql(r#"Events { later: date_add(.ts, 2, "hours") }"#)
10691            .unwrap();
10692        match result {
10693            QueryResult::Rows { rows, .. } => {
10694                assert_eq!(rows[0][0], Value::DateTime(base + 2 * 3_600_000_000));
10695            }
10696            _ => panic!("expected rows"),
10697        }
10698    }
10699
10700    #[test]
10701    fn test_date_diff() {
10702        let mut engine = test_engine();
10703        engine
10704            .execute_powql("type Events { required start_ts: datetime, required end_ts: datetime }")
10705            .unwrap();
10706        let t1 = 1705321845000000_i64; // 2024-01-15 12:30:45 UTC
10707        let t2 = t1 + 3 * 86_400_000_000; // +3 days
10708        engine
10709            .execute_powql(&format!(
10710                "insert Events {{ start_ts := {t1}, end_ts := {t2} }}"
10711            ))
10712            .unwrap();
10713        let result = engine
10714            .execute_powql(r#"Events { diff: date_diff(.end_ts, .start_ts, "days") }"#)
10715            .unwrap();
10716        match result {
10717            QueryResult::Rows { rows, .. } => {
10718                assert_eq!(rows[0][0], Value::Int(3));
10719            }
10720            _ => panic!("expected rows"),
10721        }
10722    }
10723}