Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod describe;
11pub mod eval;
12pub mod json;
13pub mod memoize;
14pub mod plan_cache;
15pub mod publications;
16pub mod query_stats;
17pub mod reorder;
18pub mod selectivity;
19pub mod statistics;
20pub mod subscriptions;
21pub mod users;
22
23pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
24
25use alloc::borrow::Cow;
26use alloc::boxed::Box;
27use alloc::collections::BTreeMap;
28use alloc::string::{String, ToString};
29use alloc::vec::Vec;
30use core::fmt;
31
32use spg_sql::ast::{
33    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement, CreatePublicationStatement,
34    CreateSubscriptionStatement, CreateTableStatement, CreateUserStatement, Expr, FrameBound,
35    FrameKind, FromClause, IndexMethod, InsertStatement, JoinKind, Literal, OrderBy, SelectItem,
36    SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding as SqlVecEncoding,
37    WindowFrame,
38};
39use spg_sql::parser::{self, ParseError};
40use spg_storage::{
41    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
42    TableSchema, Value, VecEncoding,
43};
44
45use crate::eval::{EvalContext, EvalError};
46
47/// Result of executing one statement.
48#[derive(Debug, Clone, PartialEq)]
49#[non_exhaustive]
50pub enum QueryResult {
51    /// DDL or DML succeeded.
52    ///
53    /// `affected` is the row count for `INSERT` and 0 elsewhere.
54    /// `modified_catalog` tells the server whether this statement
55    /// caused the *committed* catalog to change — it's the signal to
56    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
57    /// statements executed inside a transaction (those only touch the
58    /// shadow), and true for `COMMIT` and for writes outside a TX.
59    CommandOk {
60        affected: usize,
61        modified_catalog: bool,
62    },
63    /// `SELECT` returned a (possibly empty) row set.
64    Rows {
65        columns: Vec<ColumnSchema>,
66        rows: Vec<Row>,
67    },
68}
69
70/// All errors the engine can return.
71///
72/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
73/// must include a `_` arm so new variants in subsequent v7.x releases
74/// are not breaking changes.
75#[derive(Debug, Clone, PartialEq)]
76#[non_exhaustive]
77pub enum EngineError {
78    Parse(ParseError),
79    Storage(StorageError),
80    Eval(EvalError),
81    /// Front-end accepted a construct that the v0.x executor doesn't support.
82    Unsupported(String),
83    /// `BEGIN` while another transaction is already open.
84    TransactionAlreadyOpen,
85    /// `COMMIT` / `ROLLBACK` with no active transaction.
86    NoActiveTransaction,
87    /// v4.0 sentinel: `execute_readonly` got a statement that
88    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
89    /// The caller should retake the write lock and dispatch through
90    /// `execute(&mut self)` instead.
91    WriteRequired,
92    /// v4.2: a SELECT would have returned more rows than the
93    /// configured `max_query_rows` cap. Carries the cap.
94    RowLimitExceeded(usize),
95    /// v4.5: cooperative cancellation — the host (server's
96    /// per-query watchdog) set the cancel flag while a long-running
97    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
98    /// is discarded; the caller should surface this as a timeout
99    /// to the client.
100    Cancelled,
101}
102
103impl fmt::Display for EngineError {
104    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105        match self {
106            Self::Parse(e) => write!(f, "parse: {e}"),
107            Self::Storage(e) => write!(f, "storage: {e}"),
108            Self::Eval(e) => write!(f, "eval: {e}"),
109            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
110            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
111            Self::NoActiveTransaction => f.write_str("no active transaction"),
112            Self::WriteRequired => {
113                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
114            }
115            Self::RowLimitExceeded(n) => {
116                write!(f, "query exceeded max_query_rows={n}")
117            }
118            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
119        }
120    }
121}
122
123impl From<ParseError> for EngineError {
124    fn from(e: ParseError) -> Self {
125        Self::Parse(e)
126    }
127}
128impl From<StorageError> for EngineError {
129    fn from(e: StorageError) -> Self {
130        Self::Storage(e)
131    }
132}
133impl From<EvalError> for EngineError {
134    fn from(e: EvalError) -> Self {
135        Self::Eval(e)
136    }
137}
138
139/// The execution engine. Holds the catalog and (later) other server-scope
140/// state. `Engine::new()` is intentionally cheap so callers can construct one
141/// per database, per test.
142/// Function pointer that returns "now" as microseconds since Unix
143/// epoch. The engine is `no_std`, so it can't reach for `std::time`
144/// itself — callers (`spg-server`, the sqllogictest runner) inject a
145/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
146/// `Unsupported`.
147pub type ClockFn = fn() -> i64;
148
149/// Function pointer that produces 16 cryptographically random bytes.
150/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
151/// itself — host (`spg-server`) injects an OS-backed source. `None`
152/// means SQL-driven `CREATE USER` falls back to a deterministic salt
153/// derived from the username (acceptable in tests; the server always
154/// installs a real RNG so production paths never see this).
155pub type SaltFn = fn() -> [u8; 16];
156
157/// v4.5 cooperative cancellation token. A long-running SELECT /
158/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
159/// and bails with `EngineError::Cancelled`. The host
160/// (`spg-server`) creates an `AtomicBool` per query, spawns a
161/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
162/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
163///
164/// `CancelToken::none()` is a no-op — used by the legacy `execute`
165/// and `execute_readonly` entry points so existing callers don't
166/// change.
167#[derive(Debug, Clone, Copy)]
168pub struct CancelToken<'a> {
169    flag: Option<&'a core::sync::atomic::AtomicBool>,
170}
171
172impl<'a> CancelToken<'a> {
173    #[must_use]
174    pub const fn none() -> Self {
175        Self { flag: None }
176    }
177
178    #[must_use]
179    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
180        Self { flag: Some(f) }
181    }
182
183    #[must_use]
184    pub fn is_cancelled(self) -> bool {
185        self.flag
186            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
187    }
188
189    /// Returns `Err(Cancelled)` if the token has been tripped.
190    /// Used at row-loop checkpoints to bail cooperatively without
191    /// scattering raw `is_cancelled` checks across the executor.
192    #[inline]
193    pub fn check(self) -> Result<(), EngineError> {
194        if self.is_cancelled() {
195            Err(EngineError::Cancelled)
196        } else {
197            Ok(())
198        }
199    }
200}
201
202// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
203// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
204//
205// Wraps a catalog blob + a user blob behind a small header so the
206// server can persist both atomically without inventing a new file.
207// Bare catalog blobs (v3.x) still load via `restore_envelope` since
208// the magic check fails fast and the function falls back to
209// `Catalog::deserialize`.
210//
211// Layout — v1 (v4.1, no CRC):
212//   [8 bytes magic "SPGENV01"]
213//   [u8 version = 1]
214//   [u32 catalog_len][catalog bytes]
215//   [u32 users_len][users bytes]
216//
217// Layout — v2 (v4.37, CRC32 of body):
218//   [8 bytes magic "SPGENV01"]
219//   [u8 version = 2]
220//   [u32 catalog_len][catalog bytes]
221//   [u32 users_len][users bytes]
222//   [u32 crc32]                      ← CRC32 of every byte before it.
223//
224// Layout — v3 (v6.1.2, publications trailer):
225//   [8 bytes magic "SPGENV01"]
226//   [u8 version = 3]
227//   [u32 catalog_len][catalog bytes]
228//   [u32 users_len][users bytes]
229//   [u32 pubs_len][publications bytes]
230//   [u32 crc32]
231//
232// Layout — v4 (v6.1.4, subscriptions trailer):
233//   [8 bytes magic "SPGENV01"]
234//   [u8 version = 4]
235//   [u32 catalog_len][catalog bytes]
236//   [u32 users_len][users bytes]
237//   [u32 pubs_len][publications bytes]
238//   [u32 subs_len][subscriptions bytes]
239//   [u32 crc32]
240//
241// Layout — v5 (v6.2.0, statistics trailer):
242//   [8 bytes magic "SPGENV01"]
243//   [u8 version = 5]
244//   [u32 catalog_len][catalog bytes]
245//   [u32 users_len][users bytes]
246//   [u32 pubs_len][publications bytes]
247//   [u32 subs_len][subscriptions bytes]
248//   [u32 stats_len][statistics bytes]      ← NEW
249//   [u32 crc32]
250//
251// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
252// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
253// statistics; v3 loads with empty subscriptions + statistics; v4
254// loads with empty statistics; v5 deserialises all three. Older
255// SPG versions reading a v5 envelope fall through the version
256// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
257// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
258
259const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
260const ENVELOPE_VERSION_V1: u8 = 1;
261const ENVELOPE_VERSION_V2: u8 = 2;
262const ENVELOPE_VERSION_V3: u8 = 3;
263const ENVELOPE_VERSION_V4: u8 = 4;
264const ENVELOPE_VERSION_V5: u8 = 5;
265
266fn build_envelope(catalog: &[u8], users: &[u8], pubs: &[u8], subs: &[u8], stats: &[u8]) -> Vec<u8> {
267    let mut out = Vec::with_capacity(
268        8 + 1
269            + 4
270            + catalog.len()
271            + 4
272            + users.len()
273            + 4
274            + pubs.len()
275            + 4
276            + subs.len()
277            + 4
278            + stats.len()
279            + 4,
280    );
281    out.extend_from_slice(ENVELOPE_MAGIC);
282    out.push(ENVELOPE_VERSION_V5);
283    out.extend_from_slice(
284        &u32::try_from(catalog.len())
285            .expect("≤ 4G catalog")
286            .to_le_bytes(),
287    );
288    out.extend_from_slice(catalog);
289    out.extend_from_slice(
290        &u32::try_from(users.len())
291            .expect("≤ 4G users")
292            .to_le_bytes(),
293    );
294    out.extend_from_slice(users);
295    out.extend_from_slice(
296        &u32::try_from(pubs.len())
297            .expect("≤ 4G publications")
298            .to_le_bytes(),
299    );
300    out.extend_from_slice(pubs);
301    out.extend_from_slice(
302        &u32::try_from(subs.len())
303            .expect("≤ 4G subscriptions")
304            .to_le_bytes(),
305    );
306    out.extend_from_slice(subs);
307    out.extend_from_slice(
308        &u32::try_from(stats.len())
309            .expect("≤ 4G statistics")
310            .to_le_bytes(),
311    );
312    out.extend_from_slice(stats);
313    let crc = spg_crypto::crc32::crc32(&out);
314    out.extend_from_slice(&crc.to_le_bytes());
315    out
316}
317
318/// Outcome of envelope parsing: either bare-catalog fallback, a
319/// successfully split section trio from a v1/v2/v3 envelope, or an
320/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
321/// (catalog-only fallback) preserves v3.x readability. v1/v2
322/// envelopes set `publications` to `None`; v3 sets it to the
323/// publications byte slice.
324enum EnvelopeParse<'a> {
325    Bare,
326    Pair {
327        catalog: &'a [u8],
328        users: &'a [u8],
329        publications: Option<&'a [u8]>,
330        subscriptions: Option<&'a [u8]>,
331        statistics: Option<&'a [u8]>,
332    },
333    CrcMismatch {
334        expected: u32,
335        computed: u32,
336    },
337}
338
339/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
340/// `Bare` for a buffer that doesn't look like an envelope (v3.x
341/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
342/// whose trailing CRC32 doesn't match the body.
343fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
344    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
345        return EnvelopeParse::Bare;
346    }
347    let version = buf[8];
348    if !matches!(
349        version,
350        ENVELOPE_VERSION_V1
351            | ENVELOPE_VERSION_V2
352            | ENVELOPE_VERSION_V3
353            | ENVELOPE_VERSION_V4
354            | ENVELOPE_VERSION_V5
355    ) {
356        return EnvelopeParse::Bare;
357    }
358    let mut p = 9usize;
359    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
360        return EnvelopeParse::Bare;
361    };
362    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
363        return EnvelopeParse::Bare;
364    };
365    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
366    p += 4;
367    if p + cat_len + 4 > buf.len() {
368        return EnvelopeParse::Bare;
369    }
370    let catalog = &buf[p..p + cat_len];
371    p += cat_len;
372    let Some(user_len_bytes) = buf.get(p..p + 4) else {
373        return EnvelopeParse::Bare;
374    };
375    let Ok(user_len_arr) = user_len_bytes.try_into() else {
376        return EnvelopeParse::Bare;
377    };
378    let user_len = u32::from_le_bytes(user_len_arr) as usize;
379    p += 4;
380    if p + user_len > buf.len() {
381        return EnvelopeParse::Bare;
382    }
383    let users = &buf[p..p + user_len];
384    p += user_len;
385    let publications = if matches!(
386        version,
387        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
388    ) {
389        // [u32 pubs_len][publications bytes]
390        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
391            return EnvelopeParse::Bare;
392        };
393        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
394            return EnvelopeParse::Bare;
395        };
396        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
397        p += 4;
398        if p + pubs_len > buf.len() {
399            return EnvelopeParse::Bare;
400        }
401        let pubs_slice = &buf[p..p + pubs_len];
402        p += pubs_len;
403        Some(pubs_slice)
404    } else {
405        None
406    };
407    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
408        // [u32 subs_len][subscriptions bytes]
409        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
410            return EnvelopeParse::Bare;
411        };
412        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
413            return EnvelopeParse::Bare;
414        };
415        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
416        p += 4;
417        if p + subs_len > buf.len() {
418            return EnvelopeParse::Bare;
419        }
420        let subs_slice = &buf[p..p + subs_len];
421        p += subs_len;
422        Some(subs_slice)
423    } else {
424        None
425    };
426    let statistics = if version == ENVELOPE_VERSION_V5 {
427        // [u32 stats_len][statistics bytes]
428        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
429            return EnvelopeParse::Bare;
430        };
431        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
432            return EnvelopeParse::Bare;
433        };
434        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
435        p += 4;
436        if p + stats_len > buf.len() {
437            return EnvelopeParse::Bare;
438        }
439        let stats_slice = &buf[p..p + stats_len];
440        p += stats_len;
441        Some(stats_slice)
442    } else {
443        None
444    };
445    if matches!(
446        version,
447        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
448    ) {
449        if p + 4 != buf.len() {
450            return EnvelopeParse::Bare;
451        }
452        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
453            return EnvelopeParse::Bare;
454        };
455        let expected = u32::from_le_bytes(crc_arr);
456        let computed = spg_crypto::crc32::crc32(&buf[..p]);
457        if expected != computed {
458            return EnvelopeParse::CrcMismatch { expected, computed };
459        }
460    } else if p != buf.len() {
461        // v1: must end exactly at the users section.
462        return EnvelopeParse::Bare;
463    }
464    EnvelopeParse::Pair {
465        catalog,
466        users,
467        publications,
468        subscriptions,
469        statistics,
470    }
471}
472
473/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
474/// threaded through `Engine::execute_in` so dispatch can identify which
475/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
476/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
477/// startup replay — implicitly uses through the unchanged
478/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
479/// runtime (dispatch holds `engine.write()` across the wrap, same as
480/// v4.34); the map shape is here to let v4.42 turn on N in-flight
481/// implicit TXs without reshuffling the engine internals.
482#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
483pub struct TxId(pub u64);
484
485/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
486/// global-shadow path. New `alloc_tx_id` handles start at 1.
487pub const IMPLICIT_TX: TxId = TxId(0);
488
489/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
490/// SEGMENTS` when no explicit target is supplied. Segments whose
491/// `OwnedSegment::bytes().len()` is **strictly** less than this
492/// value are eligible to merge. spg-server reads
493/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
494pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
495
496/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
497/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
498/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
499/// rolls back (slot removed, catalog discarded).
500#[derive(Debug, Default, Clone)]
501struct TxState {
502    /// The TX's shadow copy of the catalog. Started as a clone of
503    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
504    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
505    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
506    catalog: Catalog,
507    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
508    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
509    /// `ROLLBACK TO <name>` restores from the entry and pops everything
510    /// after it; `RELEASE <name>` discards the entry and everything
511    /// after; COMMIT/ROLLBACK clears the whole stack.
512    savepoints: Vec<(String, Catalog)>,
513}
514
515/// v7.11.0 — frozen read-only view of the engine's committed state.
516/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
517/// catalog, statistics, clock function, and row-cap config — the
518/// four fields the `execute_readonly` path actually reads. Cheap to
519/// `Clone` (each clone shares the underlying `PersistentVec` row
520/// storage; only the trie root pointers copy). Send + Sync so a
521/// snapshot can be moved across `tokio::task::spawn_blocking`
522/// boundaries without coordination.
523///
524/// The contract: a snapshot reflects the engine's state at the
525/// moment `clone_snapshot()` returned. Subsequent writes to the
526/// engine are NOT visible. Callers who need fresher data take a
527/// new snapshot.
528#[derive(Debug, Clone)]
529pub struct CatalogSnapshot {
530    catalog: Catalog,
531    statistics: statistics::Statistics,
532    clock: Option<ClockFn>,
533    max_query_rows: Option<usize>,
534}
535
536#[derive(Debug, Default)]
537pub struct Engine {
538    /// Committed catalog — what survives `Engine::snapshot()` and what
539    /// outside-TX `SELECT`s read.
540    catalog: Catalog,
541    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
542    /// v4.41.1 runtime invariant: at most one entry (single-writer
543    /// model unchanged). v4.42 will let dispatch hold multiple entries
544    /// concurrently for group commit + engine MVCC.
545    tx_catalogs: BTreeMap<TxId, TxState>,
546    /// Which slot the next exec_* call should mutate. Set by
547    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
548    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
549    /// write goes straight against `catalog`).
550    current_tx: Option<TxId>,
551    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
552    /// reserved for `IMPLICIT_TX`.
553    next_tx_id: u64,
554    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
555    /// / `CURRENT_DATE`. Set by the host environment.
556    clock: Option<ClockFn>,
557    /// v4.1 cryptographic RNG for per-user password salt. Set by the
558    /// host. `None` means SQL-driven `CREATE USER` uses a
559    /// deterministic fallback — see `SaltFn`.
560    salt_fn: Option<SaltFn>,
561    /// v4.2 per-query row cap. `None` = unlimited. When set, a
562    /// SELECT that materialises more than `n` rows returns
563    /// `EngineError::RowLimitExceeded`. Enforced before the result
564    /// is shaped into wire frames so a runaway scan can't blow the
565    /// server's heap.
566    max_query_rows: Option<usize>,
567    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
568    /// the server decides what that means at the auth boundary
569    /// (open mode vs legacy single-password mode). User CRUD goes
570    /// through `create_user`/`drop_user`/`verify_user`; persistence
571    /// rides the snapshot envelope alongside the catalog.
572    users: UserStore,
573    /// v6.1.2 logical-replication publication catalog. Empty until
574    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
575    /// trailer (see `build_envelope`).
576    publications: publications::Publications,
577    /// v6.1.4 logical-replication subscription catalog. Empty until
578    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
579    /// trailer.
580    subscriptions: subscriptions::Subscriptions,
581    /// v6.2.0 — per-column statistics for the cost-based optimizer.
582    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
583    /// table. Persistence rides the v5 envelope trailer.
584    statistics: statistics::Statistics,
585    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
586    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
587    /// the snapshot envelope (rebuilt on demand after restart).
588    plan_cache: plan_cache::PlanCache,
589    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
590    /// surfaced via `spg_stat_query` virtual table. Updated by the
591    /// `execute_*` paths after a successful execute.
592    query_stats: query_stats::QueryStats,
593    /// v6.5.2 — connection-state provider callback. spg-server
594    /// registers a function at startup that snapshots its
595    /// per-pgwire-connection registry into `ActivityRow`s; engine
596    /// reads through it on every `SELECT * FROM spg_stat_activity`.
597    /// `None` ⇒ no-data (returns empty rows; matches the no_std
598    /// embedded callers that don't run pgwire).
599    activity_provider: Option<ActivityProvider>,
600    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
601    /// activity_provider: spg-server registers both at startup;
602    /// engine reads through on `SELECT * FROM spg_audit_chain` and
603    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
604    audit_chain_provider: Option<AuditChainProvider>,
605    audit_verifier: Option<AuditVerifier>,
606    /// v6.5.6 — slow-query log threshold in microseconds. When set,
607    /// every successful execute whose elapsed exceeds the threshold
608    /// gets fed to the registered slow-query log callback (so
609    /// spg-server can emit a structured log line). Default `None`
610    /// = no slow-query logging.
611    slow_query_threshold_us: Option<u64>,
612    slow_query_logger: Option<SlowQueryLogger>,
613}
614
615/// v6.5.6 — callback signature for slow-query log emission. Called
616/// with `(sql, elapsed_us)` once per successful execute that crosses
617/// the threshold.
618pub type SlowQueryLogger = fn(&str, u64);
619
620/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
621/// state. Round-trips through `Engine::execute` to recreate the
622/// same schema (sans data + indexes — indexes are emitted as a
623/// separate `CREATE INDEX` chain in `spg_database_ddl`).
624fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
625    let mut out = alloc::format!("CREATE TABLE {name} (");
626    for (i, col) in columns.iter().enumerate() {
627        if i > 0 {
628            out.push_str(", ");
629        }
630        out.push_str(&col.name);
631        out.push(' ');
632        out.push_str(&render_data_type(col.ty));
633        if !col.nullable {
634            out.push_str(" NOT NULL");
635        }
636        if col.auto_increment {
637            out.push_str(" AUTO_INCREMENT");
638        }
639    }
640    out.push(')');
641    out
642}
643
644fn render_data_type(ty: DataType) -> String {
645    match ty {
646        DataType::SmallInt => "SMALLINT".into(),
647        DataType::Int => "INT".into(),
648        DataType::BigInt => "BIGINT".into(),
649        DataType::Float => "FLOAT".into(),
650        DataType::Text => "TEXT".into(),
651        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
652        DataType::Char(n) => alloc::format!("CHAR({n})"),
653        DataType::Bool => "BOOL".into(),
654        DataType::Vector { dim, encoding } => match encoding {
655            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
656            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
657            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
658        },
659        DataType::Numeric { precision, scale } => {
660            alloc::format!("NUMERIC({precision},{scale})")
661        }
662        DataType::Date => "DATE".into(),
663        DataType::Timestamp => "TIMESTAMP".into(),
664        DataType::Interval => "INTERVAL".into(),
665        DataType::Json => "JSON".into(),
666        DataType::Jsonb => "JSONB".into(),
667        DataType::Timestamptz => "TIMESTAMPTZ".into(),
668        DataType::Bytes => "BYTEA".into(),
669        DataType::TextArray => "TEXT[]".into(),
670        DataType::IntArray => "INT[]".into(),
671        DataType::BigIntArray => "BIGINT[]".into(),
672    }
673}
674
675/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
676/// spg-server can construct rows without re-exporting internal
677/// dispatch types.
678#[derive(Debug, Clone)]
679pub struct ActivityRow {
680    pub pid: u32,
681    pub user: String,
682    pub started_at_us: i64,
683    pub current_sql: String,
684    pub wait_event: String,
685    pub elapsed_us: i64,
686    pub in_transaction: bool,
687}
688
689/// v6.5.2 — provider callback type. Fresh snapshot returned each
690/// call; engine doesn't cache the slice.
691pub type ActivityProvider = fn() -> Vec<ActivityRow>;
692
693/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
694/// spg-server can construct rows directly from `AuditEntry`.
695#[derive(Debug, Clone)]
696pub struct AuditRow {
697    pub seq: i64,
698    pub ts_ms: i64,
699    pub prev_hash_hex: String,
700    pub entry_hash_hex: String,
701    pub sql: String,
702}
703
704/// v6.5.3 — chain-table provider + verifier. spg-server registers
705/// fn pointers that snapshot / verify the audit log. `verify`
706/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
707/// `-1` on a clean chain.
708pub type AuditChainProvider = fn() -> Vec<AuditRow>;
709pub type AuditVerifier = fn() -> (i64, i64);
710
711impl Engine {
712    pub fn new() -> Self {
713        Self {
714            catalog: Catalog::new(),
715            tx_catalogs: BTreeMap::new(),
716            current_tx: None,
717            next_tx_id: 1,
718            clock: None,
719            salt_fn: None,
720            max_query_rows: None,
721            users: UserStore::new(),
722            publications: publications::Publications::new(),
723            subscriptions: subscriptions::Subscriptions::new(),
724            statistics: statistics::Statistics::new(),
725            plan_cache: plan_cache::PlanCache::new(),
726            query_stats: query_stats::QueryStats::new(),
727            activity_provider: None,
728            audit_chain_provider: None,
729            audit_verifier: None,
730            slow_query_threshold_us: None,
731            slow_query_logger: None,
732        }
733    }
734
735    /// v7.11.0 — clone the engine's committed catalog + read-time
736    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
737    /// backed by `PersistentVec`; cloning is O(log n) per table).
738    /// Subsequent writes to this engine are invisible to the
739    /// snapshot; the snapshot is self-contained and can be moved
740    /// to another thread for concurrent `execute_readonly_on_snapshot`
741    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
742    /// and any other read-fanout pattern.
743    #[must_use]
744    pub fn clone_snapshot(&self) -> CatalogSnapshot {
745        CatalogSnapshot {
746            catalog: self.active_catalog().clone(),
747            statistics: self.statistics.clone(),
748            clock: self.clock,
749            max_query_rows: self.max_query_rows,
750        }
751    }
752
753    /// v7.11.1 — execute a read-only SQL statement against a
754    /// `CatalogSnapshot` without touching this engine. Same
755    /// semantics as `execute_readonly` but parameterised on the
756    /// snapshot's catalog. Reject DDL/DML the same way
757    /// `execute_readonly` does. Static-on-Self so the caller can
758    /// dispatch without holding an `Engine` borrow alongside the
759    /// snapshot.
760    pub fn execute_readonly_on_snapshot(
761        snapshot: &CatalogSnapshot,
762        sql: &str,
763    ) -> Result<QueryResult, EngineError> {
764        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
765    }
766
767    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
768    /// cancellation. Builds a transient `Engine` over the snapshot
769    /// state, runs `execute_readonly_with_cancel`, drops. The
770    /// transient engine is cheap to construct (no I/O; everything
771    /// is just struct moves) and lets the existing read path stay
772    /// untouched.
773    pub fn execute_readonly_on_snapshot_with_cancel(
774        snapshot: &CatalogSnapshot,
775        sql: &str,
776        cancel: CancelToken<'_>,
777    ) -> Result<QueryResult, EngineError> {
778        let transient = Engine {
779            catalog: snapshot.catalog.clone(),
780            statistics: snapshot.statistics.clone(),
781            clock: snapshot.clock,
782            max_query_rows: snapshot.max_query_rows,
783            ..Engine::default()
784        };
785        transient.execute_readonly_with_cancel(sql, cancel)
786    }
787
788    /// Construct an engine restored from a previously-snapshotted catalog
789    /// (see `snapshot()`).
790    pub fn restore(catalog: Catalog) -> Self {
791        Self {
792            catalog,
793            tx_catalogs: BTreeMap::new(),
794            current_tx: None,
795            next_tx_id: 1,
796            clock: None,
797            salt_fn: None,
798            max_query_rows: None,
799            users: UserStore::new(),
800            publications: publications::Publications::new(),
801            subscriptions: subscriptions::Subscriptions::new(),
802            statistics: statistics::Statistics::new(),
803            plan_cache: plan_cache::PlanCache::new(),
804            query_stats: query_stats::QueryStats::new(),
805            activity_provider: None,
806            audit_chain_provider: None,
807            audit_verifier: None,
808            slow_query_threshold_us: None,
809            slow_query_logger: None,
810        }
811    }
812
813    /// Restore an engine + user table from a v4.1 envelope produced
814    /// by `snapshot_with_users()`. Falls back to plain catalog-only
815    /// restore if the envelope magic isn't present (so v3.x snapshot
816    /// files still load). v6.1.2 adds the optional publications
817    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
818    /// empty publication table.
819    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
820        match split_envelope(buf) {
821            EnvelopeParse::Pair {
822                catalog: catalog_bytes,
823                users: user_bytes,
824                publications: pub_bytes,
825                subscriptions: sub_bytes,
826                statistics: stats_bytes,
827            } => {
828                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
829                let users = users::deserialize_users(user_bytes)
830                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
831                let publications = match pub_bytes {
832                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
833                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
834                    })?,
835                    None => publications::Publications::new(),
836                };
837                let subscriptions = match sub_bytes {
838                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
839                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
840                    })?,
841                    None => subscriptions::Subscriptions::new(),
842                };
843                let statistics = match stats_bytes {
844                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
845                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
846                    })?,
847                    None => statistics::Statistics::new(),
848                };
849                Ok(Self {
850                    catalog,
851                    tx_catalogs: BTreeMap::new(),
852                    current_tx: None,
853                    next_tx_id: 1,
854                    clock: None,
855                    salt_fn: None,
856                    max_query_rows: None,
857                    users,
858                    publications,
859                    subscriptions,
860                    statistics,
861                    plan_cache: plan_cache::PlanCache::new(),
862                    query_stats: query_stats::QueryStats::new(),
863                    activity_provider: None,
864                    audit_chain_provider: None,
865                    audit_verifier: None,
866                    slow_query_threshold_us: None,
867                    slow_query_logger: None,
868                })
869            }
870            EnvelopeParse::CrcMismatch { expected, computed } => {
871                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
872                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
873                ))))
874            }
875            EnvelopeParse::Bare => {
876                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
877                Ok(Self::restore(catalog))
878            }
879        }
880    }
881
882    pub const fn users(&self) -> &UserStore {
883        &self.users
884    }
885
886    /// `salt` is supplied by the caller (the host has a random
887    /// source; the engine is `no_std`). Caller should pass a fresh
888    /// 16-byte random value per user.
889    pub fn create_user(
890        &mut self,
891        name: &str,
892        password: &str,
893        role: Role,
894        salt: [u8; 16],
895    ) -> Result<(), UserError> {
896        self.users.create(name, password, role, salt)?;
897        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
898        // auth can verify without re-running PBKDF2 per attempt.
899        // Uses a fresh salt from the host RNG (falls back to a
900        // deterministic per-username salt when no RNG is wired, same
901        // as the legacy hash path).
902        let scram_salt = self.salt_fn.map_or_else(
903            || {
904                let mut s = [0u8; users::SCRAM_SALT_LEN];
905                let digest = spg_crypto::hash(name.as_bytes());
906                // Use bytes 16..32 of BLAKE3 so we don't reuse the
907                // exact same fallback salt as the BLAKE3 hash path.
908                s.copy_from_slice(&digest[16..32]);
909                s
910            },
911            |f| f(),
912        );
913        self.users
914            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
915        Ok(())
916    }
917
918    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
919        self.users.drop(name)
920    }
921
922    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
923        self.users.verify(name, password)
924    }
925
926    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
927    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
928    #[must_use]
929    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
930        self.clock = Some(clock);
931        self
932    }
933
934    /// Builder: attach an OS-backed RNG for per-user password salts.
935    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
936    #[must_use]
937    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
938        self.salt_fn = Some(f);
939        self
940    }
941
942    /// Builder: cap the number of rows a single SELECT may return.
943    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
944    /// the bound is checked inside the executor so a runaway
945    /// catalog scan can't allocate millions of rows before the
946    /// server gets a chance to reject the result.
947    #[must_use]
948    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
949        self.max_query_rows = Some(n);
950        self
951    }
952
953    /// The *committed* catalog. Note: during a transaction this returns the
954    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
955    /// the shadow. Tests that inspect outside-TX state should use this.
956    pub const fn catalog(&self) -> &Catalog {
957        &self.catalog
958    }
959
960    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
961    /// adds the rule that an open TX's shadow is never snapshotted — only the
962    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
963    /// when there are users to persist; an empty user table snapshots as the
964    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
965    /// adds publications to the envelope condition: either non-empty
966    /// users OR non-empty publications now triggers the envelope path.
967    pub fn snapshot(&self) -> Vec<u8> {
968        if self.users.is_empty()
969            && self.publications.is_empty()
970            && self.subscriptions.is_empty()
971            && self.statistics.is_empty()
972        {
973            self.catalog.serialize()
974        } else {
975            build_envelope(
976                &self.catalog.serialize(),
977                &users::serialize_users(&self.users),
978                &self.publications.serialize(),
979                &self.subscriptions.serialize(),
980                &self.statistics.serialize(),
981            )
982        }
983    }
984
985    /// True when at least one TX slot is in flight. v4.41.1 runtime
986    /// invariant: at most one slot active at a time (dispatch holds
987    /// `engine.write()` across the entire wrap). v4.42 will let this
988    /// return true with multiple slots concurrently.
989    pub fn in_transaction(&self) -> bool {
990        !self.tx_catalogs.is_empty()
991    }
992
993    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
994    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
995    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
996    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
997    /// sequentially under a single `engine.write()` so each task's
998    /// mutations accumulate into shared state, then either keeps the
999    /// accumulated state (fsync OK) or restores the pre-image via
1000    /// `replace_catalog` (fsync err).
1001    pub fn alloc_tx_id(&mut self) -> TxId {
1002        let id = TxId(self.next_tx_id);
1003        self.next_tx_id = self.next_tx_id.saturating_add(1);
1004        id
1005    }
1006
1007    /// v4.42 — atomically replace the live catalog. Used by the
1008    /// commit-barrier leader to roll back a group whose batched
1009    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1010    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1011    /// at group start, sequentially applies each task's BEGIN+sql+
1012    /// COMMIT under the same write lock to accumulate mutations
1013    /// into shared state, batches the WAL bytes, fsyncs once, and
1014    /// on failure calls this with the pre-image to undo every
1015    /// task in the group at once.
1016    ///
1017    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1018    /// explicit-TX slot from a concurrent client (created via the
1019    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1020    /// MVCC-readers v5+ work) has its own snapshot baked into the
1021    /// slot — restoring `self.catalog` to the pre-image leaves
1022    /// those slots untouched, exactly as they were when the leader
1023    /// took the lock. The leader's own implicit-TX slots are all
1024    /// already discarded (`exec_commit` removed them as each
1025    /// task's COMMIT ran) by the time this is reached.
1026    pub fn replace_catalog(&mut self, catalog: Catalog) {
1027        self.catalog = catalog;
1028    }
1029
1030    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1031    /// so tests + the spg-server freezer can drive a freeze without
1032    /// reaching into the private `active_catalog_mut`. v6.7.4
1033    /// parallel freezer will build on this surface.
1034    ///
1035    /// Marks the table's cached `cold_row_count` stale because the
1036    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1037    pub fn freeze_oldest_to_cold(
1038        &mut self,
1039        table_name: &str,
1040        index_name: &str,
1041        max_rows: usize,
1042    ) -> Result<spg_storage::FreezeReport, EngineError> {
1043        let report = self
1044            .active_catalog_mut()
1045            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1046            .map_err(EngineError::Storage)?;
1047        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1048            t.mark_cold_row_count_stale();
1049        }
1050        Ok(report)
1051    }
1052
1053    /// v6.7.5 — public shim used by the spg-server follower's
1054    /// segment-forwarding receiver. Registers a cold-tier segment
1055    /// at a specific id (the master's id, as transmitted on the
1056    /// wire) so the follower's BTree-Cold locators stay byte-
1057    /// identical with the master's. Wraps
1058    /// `Catalog::load_segment_bytes_at` under the standard
1059    /// clone-mutate-replace pattern.
1060    ///
1061    /// Returns `Ok(())` on success **and** on the "slot already
1062    /// occupied" case — a follower mid-reconnect may receive a
1063    /// segment chunk for a segment_id it already has on disk
1064    /// (forwarded last session); the caller should treat that
1065    /// path as a no-op rather than a fatal error.
1066    pub fn receive_cold_segment(
1067        &mut self,
1068        segment_id: u32,
1069        bytes: Vec<u8>,
1070    ) -> Result<(), EngineError> {
1071        let mut new_cat = self.catalog.clone();
1072        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1073            Ok(()) => {
1074                self.replace_catalog(new_cat);
1075                Ok(())
1076            }
1077            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1078            Err(e) => Err(EngineError::Storage(e)),
1079        }
1080    }
1081
1082    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1083    /// driving every BTree index on every user table. Returns one
1084    /// `(table, index, report)` triple for each merge that
1085    /// actually happened (no-op (table, index) pairs are filtered
1086    /// out so callers can size persist-side work to the live
1087    /// merges). Caller is responsible for persisting each
1088    /// `report.merged_segment_bytes` and updating the on-disk
1089    /// segment registry; engine layer is no_std and never
1090    /// touches disk.
1091    ///
1092    /// Marks every touched table's cached `cold_row_count` stale
1093    /// — compaction GC'd some shadowed rows, so the count must be
1094    /// re-derived on the next ANALYZE.
1095    pub fn compact_cold_segments_with_target(
1096        &mut self,
1097        target_segment_bytes: u64,
1098    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1099        let table_names = self.active_catalog().table_names();
1100        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1101        for tname in table_names {
1102            if is_internal_table_name(&tname) {
1103                continue;
1104            }
1105            let idx_names: Vec<String> = {
1106                let Some(t) = self.active_catalog().get(&tname) else {
1107                    continue;
1108                };
1109                t.indices()
1110                    .iter()
1111                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1112                    .map(|i| i.name.clone())
1113                    .collect()
1114            };
1115            for iname in idx_names {
1116                let report = self
1117                    .active_catalog_mut()
1118                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1119                    .map_err(EngineError::Storage)?;
1120                if report.merged_segment_id.is_some() {
1121                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1122                        t.mark_cold_row_count_stale();
1123                    }
1124                    reports.push((tname.clone(), iname, report));
1125                }
1126            }
1127        }
1128        Ok(reports)
1129    }
1130
1131    fn active_catalog(&self) -> &Catalog {
1132        match self.current_tx {
1133            Some(t) => self
1134                .tx_catalogs
1135                .get(&t)
1136                .map_or(&self.catalog, |s| &s.catalog),
1137            None => &self.catalog,
1138        }
1139    }
1140
1141    fn active_catalog_mut(&mut self) -> &mut Catalog {
1142        let tx = self.current_tx;
1143        match tx {
1144            Some(t) => match self.tx_catalogs.get_mut(&t) {
1145                Some(s) => &mut s.catalog,
1146                None => &mut self.catalog,
1147            },
1148            None => &mut self.catalog,
1149        }
1150    }
1151
1152    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1153    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1154    /// every other statement, so the caller can fall through to the
1155    /// `&mut self` `execute` path under a write lock. Engine state is
1156    /// not mutated even on the success path (`rewrite_clock_calls`
1157    /// and `resolve_order_by_position` both mutate the locally-owned
1158    /// AST, not `self`).
1159    ///
1160    /// **v4.0 concurrency**: this is the entry point the server takes
1161    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1162    /// parallel without serialising on a single mutex.
1163    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1164        self.execute_readonly_with_cancel(sql, CancelToken::none())
1165    }
1166
1167    /// v4.5 — read path with cooperative cancellation. Token's
1168    /// `is_cancelled` is checked at the start (so a watchdog that
1169    /// already fired returns Cancelled immediately) and at row-loop
1170    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1171    /// don't bother checking.
1172    pub fn execute_readonly_with_cancel(
1173        &self,
1174        sql: &str,
1175        cancel: CancelToken<'_>,
1176    ) -> Result<QueryResult, EngineError> {
1177        cancel.check()?;
1178        let mut stmt = parser::parse_statement(sql)?;
1179        let now_micros = self.clock.map(|f| f());
1180        rewrite_clock_calls(&mut stmt, now_micros);
1181        if let Statement::Select(s) = &mut stmt {
1182            resolve_order_by_position(s);
1183            // v6.2.3 — cost-based JOIN reorder (read path).
1184            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1185        }
1186        let result = match stmt {
1187            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1188            Statement::ShowTables => Ok(self.exec_show_tables()),
1189            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1190            Statement::ShowUsers => Ok(self.exec_show_users()),
1191            Statement::ShowPublications => Ok(self.exec_show_publications()),
1192            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1193            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1194                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1195            )),
1196            Statement::Explain(e) => self.exec_explain(&e, cancel),
1197            _ => Err(EngineError::WriteRequired),
1198        };
1199        self.enforce_row_limit(result)
1200    }
1201
1202    /// v4.2: cap result-set size. Applied after the executor
1203    /// materialises rows but before they leave the engine — wrapping
1204    /// every Rows-returning exec_* function would scatter the check.
1205    fn enforce_row_limit(
1206        &self,
1207        result: Result<QueryResult, EngineError>,
1208    ) -> Result<QueryResult, EngineError> {
1209        if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1210            && rows.len() > cap
1211        {
1212            return Err(EngineError::RowLimitExceeded(cap));
1213        }
1214        result
1215    }
1216
1217    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1218        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1219    }
1220
1221    /// v4.5 — write path with cooperative cancellation. Same dispatch
1222    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1223    /// a separate entry point for backward-compat with the v4.5
1224    /// public API.
1225    pub fn execute_with_cancel(
1226        &mut self,
1227        sql: &str,
1228        cancel: CancelToken<'_>,
1229    ) -> Result<QueryResult, EngineError> {
1230        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1231    }
1232
1233    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1234    /// slot identified by `tx_id` so spg-server dispatch can scope
1235    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1236    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1237    /// every other caller (engine self-tests, replay, spg-embedded)
1238    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1239    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1240        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1241    }
1242
1243    /// v4.41.1 write path with cooperative cancellation + explicit TX
1244    /// scope. Sets `self.current_tx` for the duration of the call so
1245    /// every `exec_*` helper transparently sees its TX's shadow
1246    /// catalog and savepoint stack; restores on exit so the field is
1247    /// only valid mid-call (no leakage across calls).
1248    pub fn execute_in_with_cancel(
1249        &mut self,
1250        sql: &str,
1251        tx_id: TxId,
1252        cancel: CancelToken<'_>,
1253    ) -> Result<QueryResult, EngineError> {
1254        let saved = self.current_tx;
1255        self.current_tx = Some(tx_id);
1256        let result = self.execute_inner_with_cancel(sql, cancel);
1257        self.current_tx = saved;
1258        result
1259    }
1260
1261    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1262    /// resulting [`Statement`] can be cached and re-executed via
1263    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1264    /// the simple-query path would synthesise internally (clock
1265    /// rewrites + ORDER BY position-ref resolution applied at
1266    /// prepare time, since both are session-independent). The
1267    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1268    /// nodes; they're resolved to concrete values per-call by
1269    /// `execute_prepared`'s substitution walk.
1270    ///
1271    /// Pgwire's `Parse` (P) message lands here.
1272    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1273        let mut stmt = parser::parse_statement(sql)?;
1274        let now_micros = self.clock.map(|f| f());
1275        rewrite_clock_calls(&mut stmt, now_micros);
1276        if let Statement::Select(s) = &mut stmt {
1277            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1278            // SELECT-list item BEFORE position / alias resolution so
1279            // downstream passes see the explicit list.
1280            expand_group_by_all(s);
1281            resolve_order_by_position(s);
1282            // v6.2.3 — cost-based JOIN reorder. No-op for
1283            // single-table FROMs or any non-INNER join shape.
1284            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1285        }
1286        Ok(stmt)
1287    }
1288
1289    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1290    /// the plan cache on hit, runs the full `prepare()` path on miss
1291    /// and inserts the resulting plan before returning. Skipping the
1292    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1293    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1294    ///
1295    /// Returns a cloned `Statement` (not a borrow) because the
1296    /// pgwire layer owns its `PreparedStmt` map per-session and the
1297    /// engine-level cache must stay available for other sessions.
1298    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1299    /// it replaces.
1300    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1301        // v6.3.1 — version-aware lookup. If the cached plan was
1302        // prepared before the most recent ANALYZE, evict and replan.
1303        let current_version = self.statistics.version();
1304        if let Some(plan) = self.plan_cache.get(sql) {
1305            if plan.statistics_version == current_version {
1306                return Ok(plan.stmt.clone());
1307            }
1308            // Stale entry — fall through to evict + re-prepare.
1309        }
1310        self.plan_cache.evict(sql);
1311        let stmt = self.prepare(sql)?;
1312        let source_tables = plan_cache::collect_source_tables(&stmt);
1313        let plan = plan_cache::PreparedPlan {
1314            stmt: stmt.clone(),
1315            statistics_version: current_version,
1316            source_tables,
1317            describe_columns: alloc::vec::Vec::new(),
1318        };
1319        self.plan_cache.insert(String::from(sql), plan);
1320        Ok(stmt)
1321    }
1322
1323    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1324    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1325        &self.plan_cache
1326    }
1327
1328    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1329    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1330        &mut self.plan_cache
1331    }
1332
1333    /// v6.3.3 — Describe a prepared `Statement` without executing.
1334    /// Returns `(parameter_oids, output_columns)`. Empty
1335    /// `output_columns` means the statement has no row-producing
1336    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1337    /// — pgwire layer maps that to a `NoData` reply.
1338    pub fn describe_prepared(&self, stmt: &Statement) -> (Vec<u32>, Vec<ColumnSchema>) {
1339        describe::describe_prepared(stmt, self.active_catalog())
1340    }
1341
1342    /// v6.1.1 — execute a [`Statement`] previously returned by
1343    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1344    /// nodes for the corresponding [`Value`] in `params` (1-based
1345    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1346    /// are decoded into typed `Value`s by the pgwire layer before
1347    /// this call so the resulting AST hits the same execution
1348    /// path as a simple query — no SQL re-parse.
1349    ///
1350    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1351    pub fn execute_prepared(
1352        &mut self,
1353        mut stmt: Statement,
1354        params: &[Value],
1355    ) -> Result<QueryResult, EngineError> {
1356        substitute_placeholders(&mut stmt, params)?;
1357        self.execute_stmt_with_cancel(stmt, CancelToken::none())
1358    }
1359
1360    fn execute_inner_with_cancel(
1361        &mut self,
1362        sql: &str,
1363        cancel: CancelToken<'_>,
1364    ) -> Result<QueryResult, EngineError> {
1365        cancel.check()?;
1366        let stmt = self.prepare(sql)?;
1367        // v6.5.1 — wrap the executor with a wall-clock window so we
1368        // can record into spg_stat_query. Skip when the engine has
1369        // no clock attached (no_std embedded callers).
1370        let start_us = self.clock.map(|f| f());
1371        let result = self.execute_stmt_with_cancel(stmt, cancel);
1372        if let (Some(t0), Ok(_)) = (start_us, &result) {
1373            let now = self.clock.map_or(t0, |f| f());
1374            let elapsed = now.saturating_sub(t0).max(0) as u64;
1375            self.query_stats.record(sql, elapsed, now as u64);
1376            // v6.5.6 — slow-query log: fire callback when elapsed
1377            // exceeds the configured floor.
1378            if let (Some(threshold), Some(logger)) =
1379                (self.slow_query_threshold_us, self.slow_query_logger)
1380                && elapsed >= threshold
1381            {
1382                logger(sql, elapsed);
1383            }
1384        }
1385        result
1386    }
1387
1388    fn execute_stmt_with_cancel(
1389        &mut self,
1390        stmt: Statement,
1391        cancel: CancelToken<'_>,
1392    ) -> Result<QueryResult, EngineError> {
1393        cancel.check()?;
1394        let result = match stmt {
1395            Statement::CreateTable(s) => self.exec_create_table(s),
1396            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
1397            // CommandOk with affected=0; modified_catalog=false so
1398            // the WAL doesn't grow a useless entry. mailrs F3.
1399            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
1400                affected: 0,
1401                modified_catalog: false,
1402            }),
1403            // v7.9.27 — DO $$ ... $$ is also a no-op (SPG has no
1404            // PL/pgSQL). mailrs H1 + pg_dump compat.
1405            Statement::DoBlock => Ok(QueryResult::CommandOk {
1406                affected: 0,
1407                modified_catalog: false,
1408            }),
1409            Statement::CreateIndex(s) => self.exec_create_index(s),
1410            Statement::Insert(s) => self.exec_insert(s),
1411            Statement::Update(s) => self.exec_update_cancel(&s, cancel),
1412            Statement::Delete(s) => self.exec_delete_cancel(&s, cancel),
1413            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1414            Statement::Begin => self.exec_begin(),
1415            Statement::Commit => self.exec_commit(),
1416            Statement::Rollback => self.exec_rollback(),
1417            Statement::Savepoint(name) => self.exec_savepoint(name),
1418            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
1419            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
1420            Statement::ShowTables => Ok(self.exec_show_tables()),
1421            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1422            Statement::ShowUsers => Ok(self.exec_show_users()),
1423            Statement::ShowPublications => Ok(self.exec_show_publications()),
1424            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1425            Statement::CreateUser(s) => self.exec_create_user(&s),
1426            Statement::DropUser(name) => self.exec_drop_user(&name),
1427            Statement::Explain(e) => self.exec_explain(&e, cancel),
1428            Statement::AlterIndex(s) => self.exec_alter_index(s),
1429            Statement::AlterTable(s) => self.exec_alter_table(s),
1430            Statement::CreatePublication(s) => self.exec_create_publication(s),
1431            Statement::DropPublication(name) => self.exec_drop_publication(&name),
1432            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
1433            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
1434            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
1435            // which lives in spg-server's ServerState. The engine
1436            // surfaces a clear error; the server-layer dispatch
1437            // intercepts the SQL before it reaches the engine on
1438            // a server build, so this arm only fires for
1439            // engine-only callers (spg-embedded, lib tests).
1440            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1441                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1442            )),
1443            // v6.2.0 — ANALYZE recomputes per-column histograms.
1444            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
1445            // v6.7.3 — COMPACT COLD SEGMENTS.
1446            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
1447        };
1448        self.enforce_row_limit(result)
1449    }
1450
1451    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
1452    /// surface as `EngineError::Unsupported` so the existing PG-wire
1453    /// error mapping stays uniform; the message carries the name so
1454    /// operators can grep replication-log noise. Inside-transaction
1455    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
1456    /// stance) — replication-catalog mutation is a connection-level
1457    /// administrative op, not a transactional one.
1458    fn exec_create_publication(
1459        &mut self,
1460        s: CreatePublicationStatement,
1461    ) -> Result<QueryResult, EngineError> {
1462        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
1463        // was over-cautious: it also blocked the auto-commit wrap
1464        // path (which begins an internal TX around every WAL-
1465        // logged statement). PG itself allows CREATE PUBLICATION
1466        // inside a transaction (it rolls back with the TX).
1467        self.publications
1468            .create(s.name, s.scope)
1469            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
1470        Ok(QueryResult::CommandOk {
1471            affected: 1,
1472            modified_catalog: true,
1473        })
1474    }
1475
1476    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
1477    /// no-op when the publication doesn't exist (returns `affected=0`
1478    /// in that case so the wire-level command tag distinguishes
1479    /// "dropped" from "no-op", though both succeed).
1480    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1481        let removed = self.publications.drop(name);
1482        Ok(QueryResult::CommandOk {
1483            affected: usize::from(removed),
1484            modified_catalog: removed,
1485        })
1486    }
1487
1488    /// v6.1.2 — read access to the publication catalog. Used by
1489    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
1490    /// (v6.1.3+), and by e2e tests that need to assert state without
1491    /// going through the wire.
1492    pub const fn publications(&self) -> &publications::Publications {
1493        &self.publications
1494    }
1495
1496    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
1497    /// `enabled = true` and `last_received_pos = 0` for a freshly-
1498    /// created subscription. The actual worker thread is spawned
1499    /// by spg-server once the engine returns success.
1500    fn exec_create_subscription(
1501        &mut self,
1502        s: CreateSubscriptionStatement,
1503    ) -> Result<QueryResult, EngineError> {
1504        // See exec_create_publication — the in_transaction gate
1505        // was over-cautious; the auto-commit wrap path holds an
1506        // internal TX that this check was incorrectly blocking.
1507        let sub = subscriptions::Subscription {
1508            conn_str: s.conn_str,
1509            publications: s.publications,
1510            enabled: true,
1511            last_received_pos: 0,
1512        };
1513        self.subscriptions
1514            .create(s.name, sub)
1515            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
1516        Ok(QueryResult::CommandOk {
1517            affected: 1,
1518            modified_catalog: true,
1519        })
1520    }
1521
1522    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
1523    /// doesn't exist (PG-compatible). The associated worker is
1524    /// torn down by spg-server when it observes the catalog
1525    /// change at the next snapshot or via the engine's
1526    /// subscriptions accessor (the worker polls the catalog on
1527    /// reconnect; v6.1.5's filter-side will tighten this to an
1528    /// explicit signal).
1529    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1530        let removed = self.subscriptions.drop(name);
1531        Ok(QueryResult::CommandOk {
1532            affected: usize::from(removed),
1533            modified_catalog: removed,
1534        })
1535    }
1536
1537    /// v6.1.4 — read access to the subscription catalog. Used by
1538    /// the subscription worker (read its own row to find its
1539    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
1540    /// and by e2e tests asserting state directly.
1541    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
1542        &self.subscriptions
1543    }
1544
1545    /// v6.1.4 — write access to `last_received_pos`. Worker
1546    /// calls this after each apply batch (under the engine's
1547    /// write-lock). Returns `false` when the subscription was
1548    /// dropped between when the worker received the record and
1549    /// when this call landed.
1550    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
1551        self.subscriptions.update_last_received_pos(name, pos)
1552    }
1553
1554    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
1555    /// `(name, conn_str, publications, enabled, last_received_pos)`
1556    /// ordered by subscription name. The `publications` column is
1557    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
1558    /// callers wanting structured access read `Engine::subscriptions`.
1559    fn exec_show_subscriptions(&self) -> QueryResult {
1560        let columns = alloc::vec![
1561            ColumnSchema::new("name", DataType::Text, false),
1562            ColumnSchema::new("conn_str", DataType::Text, false),
1563            ColumnSchema::new("publications", DataType::Text, false),
1564            ColumnSchema::new("enabled", DataType::Bool, false),
1565            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1566        ];
1567        let rows: Vec<Row> = self
1568            .subscriptions
1569            .iter()
1570            .map(|(name, sub)| {
1571                Row::new(alloc::vec![
1572                    Value::Text(name.clone()),
1573                    Value::Text(sub.conn_str.clone()),
1574                    Value::Text(sub.publications.join(", ")),
1575                    Value::Bool(sub.enabled),
1576                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1577                ])
1578            })
1579            .collect();
1580        QueryResult::Rows { columns, rows }
1581    }
1582
1583    /// v6.2.0 — materialise `spg_statistic` rows. One row per
1584    /// `(table, column)` pair tracked in `Statistics`, with
1585    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
1586    /// the same canonical form vector literals use for round-trip.
1587    fn exec_spg_statistic(&self) -> QueryResult {
1588        let columns = alloc::vec![
1589            ColumnSchema::new("table_name", DataType::Text, false),
1590            ColumnSchema::new("column_name", DataType::Text, false),
1591            ColumnSchema::new("null_frac", DataType::Float, false),
1592            ColumnSchema::new("n_distinct", DataType::BigInt, false),
1593            ColumnSchema::new("histogram_bounds", DataType::Text, false),
1594            // v6.7.0 — appended column (v6.2.0 stability contract
1595            // allows APPEND to spg_statistic, not reorder/rename).
1596            // Reports the cached per-table cold-row count; same
1597            // value across every column row of the same table.
1598            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
1599        ];
1600        let rows: Vec<Row> = self
1601            .statistics
1602            .iter()
1603            .map(|((t, c), s)| {
1604                let cold = self
1605                    .catalog
1606                    .get(t)
1607                    .map_or(0, |table| table.cold_row_count());
1608                Row::new(alloc::vec![
1609                    Value::Text(t.clone()),
1610                    Value::Text(c.clone()),
1611                    Value::Float(f64::from(s.null_frac)),
1612                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
1613                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
1614                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
1615                ])
1616            })
1617            .collect();
1618        QueryResult::Rows { columns, rows }
1619    }
1620
1621    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
1622    /// per subscription with `(name, conn_str, publications,
1623    /// last_received_pos, enabled)`. Surface mirrors
1624    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
1625    /// shape so it composes with SELECT clauses (WHERE, projection
1626    /// onto specific columns, etc).
1627    fn exec_spg_stat_replication(&self) -> QueryResult {
1628        let columns = alloc::vec![
1629            ColumnSchema::new("name", DataType::Text, false),
1630            ColumnSchema::new("conn_str", DataType::Text, false),
1631            ColumnSchema::new("publications", DataType::Text, false),
1632            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1633            ColumnSchema::new("enabled", DataType::Bool, false),
1634        ];
1635        let rows: Vec<Row> = self
1636            .subscriptions
1637            .iter()
1638            .map(|(name, sub)| {
1639                Row::new(alloc::vec![
1640                    Value::Text(name.clone()),
1641                    Value::Text(sub.conn_str.clone()),
1642                    Value::Text(sub.publications.join(",")),
1643                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1644                    Value::Bool(sub.enabled),
1645                ])
1646            })
1647            .collect();
1648        QueryResult::Rows { columns, rows }
1649    }
1650
1651    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
1652    /// cold-tier segment with `(segment_id, num_rows, num_pages,
1653    /// total_bytes)`.
1654    ///
1655    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
1656    /// carve-out. Walks every user table's BTree indices to find
1657    /// which table's Cold locators point at each segment. Empty
1658    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
1659    /// before any index registered a locator). The walk is
1660    /// O(tables × indices × keys); cached per call, not across
1661    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
1662    fn exec_spg_stat_segment(&self) -> QueryResult {
1663        let columns = alloc::vec![
1664            ColumnSchema::new("segment_id", DataType::BigInt, false),
1665            ColumnSchema::new("table_name", DataType::Text, false),
1666            ColumnSchema::new("num_rows", DataType::BigInt, false),
1667            ColumnSchema::new("num_pages", DataType::BigInt, false),
1668            ColumnSchema::new("total_bytes", DataType::BigInt, false),
1669        ];
1670        // v6.7.0 — build a segment_id → table_name map by walking
1671        // every user table's BTree indices once. O(tables × indices
1672        // × keys) for the v6.5.0 carve-out resolution; acceptable
1673        // because spg_stat_segment is operator-facing (not on a
1674        // hot-loop path).
1675        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
1676        for tname in self.catalog.table_names() {
1677            if is_internal_table_name(&tname) {
1678                continue;
1679            }
1680            let Some(t) = self.catalog.get(&tname) else {
1681                continue;
1682            };
1683            for idx in t.indices() {
1684                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
1685                    for (_, locs) in map.iter() {
1686                        for loc in locs {
1687                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
1688                                segment_owners
1689                                    .entry(*segment_id)
1690                                    .or_insert_with(|| tname.clone());
1691                            }
1692                        }
1693                    }
1694                }
1695            }
1696        }
1697        let rows: Vec<Row> = self
1698            .catalog
1699            .cold_segment_ids_global()
1700            .iter()
1701            .filter_map(|&id| {
1702                let seg = self.catalog.cold_segment(id)?;
1703                let meta = seg.meta();
1704                let owner = segment_owners.get(&id).cloned().unwrap_or_default();
1705                Some(Row::new(alloc::vec![
1706                    Value::BigInt(i64::from(id)),
1707                    Value::Text(owner),
1708                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
1709                    Value::BigInt(i64::from(meta.num_pages)),
1710                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
1711                ]))
1712            })
1713            .collect();
1714        QueryResult::Rows { columns, rows }
1715    }
1716
1717    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
1718    /// distinct SQL text recorded since the engine booted, capped
1719    /// at `QUERY_STATS_MAX` (1024). Columns:
1720    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
1721    /// mean_us = total_us / exec_count (saturating).
1722    fn exec_spg_stat_query(&self) -> QueryResult {
1723        let columns = alloc::vec![
1724            ColumnSchema::new("sql", DataType::Text, false),
1725            ColumnSchema::new("exec_count", DataType::BigInt, false),
1726            ColumnSchema::new("total_us", DataType::BigInt, false),
1727            ColumnSchema::new("mean_us", DataType::BigInt, false),
1728            ColumnSchema::new("max_us", DataType::BigInt, false),
1729            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
1730        ];
1731        let rows: Vec<Row> = self
1732            .query_stats
1733            .snapshot()
1734            .into_iter()
1735            .map(|(sql, s)| {
1736                let mean = if s.exec_count == 0 {
1737                    0
1738                } else {
1739                    s.total_us / s.exec_count
1740                };
1741                Row::new(alloc::vec![
1742                    Value::Text(sql),
1743                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
1744                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
1745                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
1746                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
1747                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
1748                ])
1749            })
1750            .collect();
1751        QueryResult::Rows { columns, rows }
1752    }
1753
1754    /// v6.5.2 — register a connection-state provider. spg-server
1755    /// calls this at startup with a function that snapshots its
1756    /// per-pgwire-connection registry. Engine reads through the
1757    /// callback on `SELECT * FROM spg_stat_activity`.
1758    #[must_use]
1759    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
1760        self.activity_provider = Some(f);
1761        self
1762    }
1763
1764    /// v6.5.3 — register audit chain provider + verifier.
1765    #[must_use]
1766    pub const fn with_audit_providers(
1767        mut self,
1768        chain: AuditChainProvider,
1769        verify: AuditVerifier,
1770    ) -> Self {
1771        self.audit_chain_provider = Some(chain);
1772        self.audit_verifier = Some(verify);
1773        self
1774    }
1775
1776    /// v6.5.6 — register a slow-query log callback. `threshold_us`
1777    /// is the floor (in microseconds); only executes above the floor
1778    /// fire the callback. spg-server wires this from
1779    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
1780    #[must_use]
1781    pub const fn with_slow_query_log(mut self, threshold_us: u64, logger: SlowQueryLogger) -> Self {
1782        self.slow_query_threshold_us = Some(threshold_us);
1783        self.slow_query_logger = Some(logger);
1784        self
1785    }
1786
1787    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
1788    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
1789    /// the compile-time default of 256.
1790    pub fn set_plan_cache_max(&mut self, n: usize) {
1791        self.plan_cache.set_max_entries(n);
1792    }
1793
1794    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
1795    /// snapshot from the registered `ActivityProvider`. Returns an
1796    /// empty result set when no provider is registered (the no_std
1797    /// embedded path with no pgwire layer).
1798    fn exec_spg_stat_activity(&self) -> QueryResult {
1799        let columns = alloc::vec![
1800            ColumnSchema::new("pid", DataType::Int, false),
1801            ColumnSchema::new("user", DataType::Text, false),
1802            ColumnSchema::new("started_at_us", DataType::BigInt, false),
1803            ColumnSchema::new("current_sql", DataType::Text, false),
1804            ColumnSchema::new("wait_event", DataType::Text, false),
1805            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
1806            ColumnSchema::new("in_transaction", DataType::Bool, false),
1807        ];
1808        let rows: Vec<Row> = self
1809            .activity_provider
1810            .map(|f| f())
1811            .unwrap_or_default()
1812            .into_iter()
1813            .map(|r| {
1814                Row::new(alloc::vec![
1815                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
1816                    Value::Text(r.user),
1817                    Value::BigInt(r.started_at_us),
1818                    Value::Text(r.current_sql),
1819                    Value::Text(r.wait_event),
1820                    Value::BigInt(r.elapsed_us),
1821                    Value::Bool(r.in_transaction),
1822                ])
1823            })
1824            .collect();
1825        QueryResult::Rows { columns, rows }
1826    }
1827
1828    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
1829    /// table with `(table_name, ddl)`. Reconstructed from catalog
1830    /// state on demand.
1831    fn exec_spg_table_ddl(&self) -> QueryResult {
1832        let columns = alloc::vec![
1833            ColumnSchema::new("table_name", DataType::Text, false),
1834            ColumnSchema::new("ddl", DataType::Text, false),
1835        ];
1836        let rows: Vec<Row> = self
1837            .catalog
1838            .table_names()
1839            .into_iter()
1840            .filter(|n| !is_internal_table_name(n))
1841            .filter_map(|name| {
1842                let table = self.catalog.get(&name)?;
1843                let ddl = render_create_table(&name, &table.schema().columns);
1844                Some(Row::new(alloc::vec![Value::Text(name), Value::Text(ddl),]))
1845            })
1846            .collect();
1847        QueryResult::Rows { columns, rows }
1848    }
1849
1850    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
1851    /// with `(role_name, ddl)`. Password is redacted (matches the
1852    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
1853    fn exec_spg_role_ddl(&self) -> QueryResult {
1854        let columns = alloc::vec![
1855            ColumnSchema::new("role_name", DataType::Text, false),
1856            ColumnSchema::new("ddl", DataType::Text, false),
1857        ];
1858        let rows: Vec<Row> = self
1859            .users
1860            .iter()
1861            .map(|(name, rec)| {
1862                let ddl = alloc::format!(
1863                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
1864                    rec.role.as_str(),
1865                );
1866                Row::new(alloc::vec![
1867                    Value::Text(String::from(name)),
1868                    Value::Text(ddl)
1869                ])
1870            })
1871            .collect();
1872        QueryResult::Rows { columns, rows }
1873    }
1874
1875    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
1876    /// `ddl` column concatenates every user table's CREATE +
1877    /// every role's CREATE in deterministic catalog order. Suitable
1878    /// for piping back through `Engine::execute` to recreate a
1879    /// schema-equivalent database.
1880    fn exec_spg_database_ddl(&self) -> QueryResult {
1881        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
1882        let mut out = String::new();
1883        for (name, rec) in self.users.iter() {
1884            out.push_str(&alloc::format!(
1885                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
1886                rec.role.as_str(),
1887            ));
1888        }
1889        for name in self.catalog.table_names() {
1890            if is_internal_table_name(&name) {
1891                continue;
1892            }
1893            if let Some(table) = self.catalog.get(&name) {
1894                out.push_str(&render_create_table(&name, &table.schema().columns));
1895                out.push_str(";\n");
1896            }
1897        }
1898        QueryResult::Rows {
1899            columns,
1900            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
1901        }
1902    }
1903
1904    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
1905    /// snapshot from the registered provider; empty when no
1906    /// provider is set.
1907    fn exec_spg_audit_chain(&self) -> QueryResult {
1908        let columns = alloc::vec![
1909            ColumnSchema::new("seq", DataType::BigInt, false),
1910            ColumnSchema::new("ts_ms", DataType::BigInt, false),
1911            ColumnSchema::new("prev_hash", DataType::Text, false),
1912            ColumnSchema::new("entry_hash", DataType::Text, false),
1913            ColumnSchema::new("sql", DataType::Text, false),
1914        ];
1915        let rows: Vec<Row> = self
1916            .audit_chain_provider
1917            .map(|f| f())
1918            .unwrap_or_default()
1919            .into_iter()
1920            .map(|r| {
1921                Row::new(alloc::vec![
1922                    Value::BigInt(r.seq),
1923                    Value::BigInt(r.ts_ms),
1924                    Value::Text(r.prev_hash_hex),
1925                    Value::Text(r.entry_hash_hex),
1926                    Value::Text(r.sql),
1927                ])
1928            })
1929            .collect();
1930        QueryResult::Rows { columns, rows }
1931    }
1932
1933    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
1934    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
1935    /// on a clean chain. Returns one row with both values 0 when
1936    /// no verifier is registered (no-data fallback for embedded
1937    /// callers).
1938    fn exec_spg_audit_verify(&self) -> QueryResult {
1939        let columns = alloc::vec![
1940            ColumnSchema::new("verified_count", DataType::BigInt, false),
1941            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
1942        ];
1943        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
1944        let row = Row::new(alloc::vec![Value::BigInt(verified), Value::BigInt(broken),]);
1945        QueryResult::Rows {
1946            columns,
1947            rows: alloc::vec![row],
1948        }
1949    }
1950
1951    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
1952    pub fn query_stats(&self) -> &query_stats::QueryStats {
1953        &self.query_stats
1954    }
1955
1956    /// v6.5.1 — mutable accessor (clear, etc).
1957    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
1958        &mut self.query_stats
1959    }
1960
1961    /// v6.2.0 — read access to the per-column statistics table.
1962    /// Used by the planner (v6.2.2 selectivity functions read this),
1963    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
1964    pub const fn statistics(&self) -> &statistics::Statistics {
1965        &self.statistics
1966    }
1967
1968    /// v6.2.1 — return tables whose modified-row count crossed the
1969    /// auto-analyze threshold since the last ANALYZE on that table.
1970    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
1971    /// ANALYZE)` — combines PG-style fractional + absolute lower
1972    /// bound so a fresh / tiny table doesn't get hammered on every
1973    /// INSERT.
1974    ///
1975    /// Designed to be cheap: walks every user table's
1976    /// `Catalog::table_names()` + reads `statistics::modified_
1977    /// since_last_analyze()` (BTreeMap lookup). The background
1978    /// worker calls this under `engine.read()` then drops the lock
1979    /// before re-acquiring `engine.write()` for the actual ANALYZE.
1980    pub fn tables_needing_analyze(&self) -> Vec<String> {
1981        const MIN_ROWS: u64 = 100;
1982        let mut out = Vec::new();
1983        for name in self.catalog.table_names() {
1984            if is_internal_table_name(&name) {
1985                continue;
1986            }
1987            let Some(table) = self.catalog.get(&name) else {
1988                continue;
1989            };
1990            let row_count = table.rows().len() as u64;
1991            let modified = self.statistics.modified_since_last_analyze(&name);
1992            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
1993            // computed in integer arithmetic so spg-engine stays
1994            // no_std without pulling in libm. `(n + 9) / 10` is
1995            // `ceil(n / 10)` for non-negative `n`.
1996            let base = row_count.max(MIN_ROWS);
1997            let threshold = base.saturating_add(9) / 10;
1998            if modified >= threshold {
1999                out.push(name);
2000            }
2001        }
2002        out
2003    }
2004
2005    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2006    /// every user table; `ANALYZE <name>` re-stats one. For each
2007    /// target table, single-pass scan + per-column histogram +
2008    /// `null_frac` + `n_distinct`. Replaces the table's prior
2009    /// stats; resets the modified-row counter.
2010    ///
2011    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2012    /// can add reservoir sampling at the > 100 K-row mark; not a
2013    /// scope blocker for the current commit since rows ≤ 100 K
2014    /// analyse in milliseconds.
2015    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2016        let names: Vec<String> = if let Some(name) = target {
2017            // Verify the table exists; surface a clear error if not.
2018            if self.catalog.get(name).is_none() {
2019                return Err(EngineError::Storage(StorageError::TableNotFound {
2020                    name: name.to_string(),
2021                }));
2022            }
2023            alloc::vec![name.to_string()]
2024        } else {
2025            self.catalog
2026                .table_names()
2027                .into_iter()
2028                .filter(|n| !is_internal_table_name(n))
2029                .collect()
2030        };
2031        let mut analysed = 0usize;
2032        for table_name in &names {
2033            self.analyze_one_table(table_name)?;
2034            analysed += 1;
2035        }
2036        // v6.3.1 — plan cache invalidation. Bump stats version so
2037        // future lookups see the new generation, and selectively
2038        // evict every plan whose `source_tables` overlap with the
2039        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2040        // whole cache.
2041        if analysed > 0 {
2042            self.statistics.bump_version();
2043            if target.is_some() {
2044                for t in &names {
2045                    self.plan_cache.evict_referencing(t);
2046                }
2047            } else {
2048                self.plan_cache.clear();
2049            }
2050        }
2051        Ok(QueryResult::CommandOk {
2052            affected: analysed,
2053            modified_catalog: true,
2054        })
2055    }
2056
2057    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2058    /// engine-layer compaction shim with the default
2059    /// 4 MiB segment-size threshold. spg-server intercepts the
2060    /// SQL before it reaches the engine on a server build —
2061    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2062    /// `Engine::compact_cold_segments_with_target` directly with
2063    /// the env value, and persists every merged segment to
2064    /// `<db>.spg/segments/`. This arm only fires for engine-only
2065    /// callers (spg-embedded, lib tests); in that mode merged
2066    /// segments live in memory and are dropped at process exit.
2067    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2068        let target = COMPACTION_TARGET_DEFAULT_BYTES;
2069        let reports = self.compact_cold_segments_with_target(target)?;
2070        let columns = alloc::vec![
2071            ColumnSchema::new("table_name", DataType::Text, false),
2072            ColumnSchema::new("index_name", DataType::Text, false),
2073            ColumnSchema::new("sources_merged", DataType::BigInt, false),
2074            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2075            ColumnSchema::new("merged_rows", DataType::BigInt, false),
2076            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2077            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2078        ];
2079        let rows: Vec<Row> = reports
2080            .into_iter()
2081            .map(|(tname, iname, report)| {
2082                Row::new(alloc::vec![
2083                    Value::Text(tname),
2084                    Value::Text(iname),
2085                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2086                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2087                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2088                    Value::BigInt(i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),),
2089                    Value::BigInt(
2090                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2091                    ),
2092                ])
2093            })
2094            .collect();
2095        Ok(QueryResult::Rows { columns, rows })
2096    }
2097
2098    /// Walk a single table's rows once and (re-)populate per-column
2099    /// stats. Drops the existing stats for `table` first so columns
2100    /// that have been DROP-ed between ANALYZEs don't leave stale
2101    /// rows.
2102    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2103        let table = self.catalog.get(table_name).ok_or_else(|| {
2104            EngineError::Storage(StorageError::TableNotFound {
2105                name: table_name.to_string(),
2106            })
2107        })?;
2108        let schema = table.schema().clone();
2109        let row_count = table.rows().len();
2110        // For each column, collect (sorted) non-NULL textual values
2111        // + count NULLs; then ask `statistics::build_histogram` to
2112        // produce the 101 bounds and `estimate_n_distinct` the
2113        // distinct count.
2114        self.statistics.clear_table(table_name);
2115        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2116            // v6.2.0 skip: vector columns have their own stats
2117            // shape (HNSW graph topology). v6.2 deliberation #1.
2118            if matches!(col_schema.ty, DataType::Vector { .. }) {
2119                continue;
2120            }
2121            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2122            let mut nulls: u64 = 0;
2123            for row in table.rows() {
2124                match row.values.get(col_pos) {
2125                    Some(Value::Null) | None => nulls += 1,
2126                    Some(v) => non_null_values.push(v.clone()),
2127                }
2128            }
2129            // Sort by type-aware ordering (Int as int, Text as
2130            // lex, etc.) so histogram bounds reflect the column's
2131            // natural order — not lexicographic on the string
2132            // representation, which would put "9" after "49".
2133            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2134            let non_null: Vec<String> = non_null_values.iter().map(canonical_value_repr).collect();
2135            let null_frac = if row_count == 0 {
2136                0.0
2137            } else {
2138                #[allow(clippy::cast_precision_loss)]
2139                let f = nulls as f32 / row_count as f32;
2140                f
2141            };
2142            let n_distinct = statistics::estimate_n_distinct(&non_null);
2143            let histogram_bounds = statistics::build_histogram(&non_null);
2144            self.statistics.set(
2145                table_name.to_string(),
2146                col_schema.name.clone(),
2147                statistics::ColumnStats {
2148                    null_frac,
2149                    n_distinct,
2150                    histogram_bounds,
2151                },
2152            );
2153        }
2154        self.statistics.reset_modified(table_name);
2155        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
2156        // BTree indices and count Cold locators (MAX across
2157        // indices); store the result on the table. Surfaced via
2158        // `spg_statistic.cold_row_count` (new column) and
2159        // `spg_stat_segment.table_name` (new column).
2160        let cold_count = {
2161            let table = self
2162                .active_catalog()
2163                .get(table_name)
2164                .expect("table still present");
2165            table.count_cold_locators()
2166        };
2167        let table_mut = self
2168            .active_catalog_mut()
2169            .get_mut(table_name)
2170            .expect("table still present");
2171        table_mut.set_cold_row_count(cold_count);
2172        Ok(())
2173    }
2174
2175    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
2176    /// `(name, scope, table_count)` ordered by publication name.
2177    ///   - `scope` is the human-readable string:
2178    ///       `"FOR ALL TABLES"` /
2179    ///       `"FOR TABLE t1, t2"` /
2180    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
2181    ///   - `table_count` is NULL for `AllTables`, the list length
2182    ///     otherwise. NULLability lets clients distinguish "publish
2183    ///     everything" from "publish exactly 0 tables" (the v6.1.3
2184    ///     parser forbids the empty list, but the column shape is
2185    ///     ready for the v6.1.5 publisher-side semantics).
2186    fn exec_show_publications(&self) -> QueryResult {
2187        let columns = alloc::vec![
2188            ColumnSchema::new("name", DataType::Text, false),
2189            ColumnSchema::new("scope", DataType::Text, false),
2190            ColumnSchema::new("table_count", DataType::Int, true),
2191        ];
2192        let rows: Vec<Row> = self
2193            .publications
2194            .iter()
2195            .map(|(name, scope)| {
2196                let (scope_str, count_val) = match scope {
2197                    spg_sql::ast::PublicationScope::AllTables => {
2198                        ("FOR ALL TABLES".to_string(), Value::Null)
2199                    }
2200                    spg_sql::ast::PublicationScope::ForTables(ts) => (
2201                        alloc::format!("FOR TABLE {}", ts.join(", ")),
2202                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2203                    ),
2204                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
2205                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
2206                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2207                    ),
2208                };
2209                Row::new(alloc::vec![
2210                    Value::Text(name.clone()),
2211                    Value::Text(scope_str),
2212                    count_val,
2213                ])
2214            })
2215            .collect();
2216        QueryResult::Rows { columns, rows }
2217    }
2218
2219    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
2220    fn exec_show_users(&self) -> QueryResult {
2221        let columns = alloc::vec![
2222            ColumnSchema::new("name", DataType::Text, false),
2223            ColumnSchema::new("role", DataType::Text, false),
2224        ];
2225        let rows: Vec<Row> = self
2226            .users
2227            .iter()
2228            .map(|(name, rec)| {
2229                Row::new(alloc::vec![
2230                    Value::Text(name.to_string()),
2231                    Value::Text(rec.role.as_str().to_string()),
2232                ])
2233            })
2234            .collect();
2235        QueryResult::Rows { columns, rows }
2236    }
2237
2238    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
2239        if self.in_transaction() {
2240            return Err(EngineError::Unsupported(
2241                "CREATE USER is not allowed inside a transaction".into(),
2242            ));
2243        }
2244        let role = users::Role::parse(&s.role).ok_or_else(|| {
2245            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
2246        })?;
2247        // Prefer the host-injected RNG. Falls back to a deterministic
2248        // salt derived from the username only when no RNG is wired —
2249        // acceptable for tests; the server always installs one.
2250        let salt = self.salt_fn.map_or_else(
2251            || {
2252                let mut s_bytes = [0u8; 16];
2253                let digest = spg_crypto::hash(s.name.as_bytes());
2254                s_bytes.copy_from_slice(&digest[..16]);
2255                s_bytes
2256            },
2257            |f| f(),
2258        );
2259        self.users
2260            .create(&s.name, &s.password, role, salt)
2261            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
2262        Ok(QueryResult::CommandOk {
2263            affected: 1,
2264            modified_catalog: true,
2265        })
2266    }
2267
2268    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2269        if self.in_transaction() {
2270            return Err(EngineError::Unsupported(
2271                "DROP USER is not allowed inside a transaction".into(),
2272            ));
2273        }
2274        self.users
2275            .drop(name)
2276            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
2277        Ok(QueryResult::CommandOk {
2278            affected: 1,
2279            modified_catalog: true,
2280        })
2281    }
2282
2283    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
2284    /// Filter pass uses the same WHERE eval as `exec_select`. Per
2285    /// matched row, evaluate each RHS expression against the *old*
2286    /// row, then call `Table::update_row` which rebuilds indices.
2287    /// Indexed columns are correctly reflected because rebuild
2288    /// happens after the cell rewrite.
2289    fn exec_update_cancel(
2290        &mut self,
2291        stmt: &spg_sql::ast::UpdateStatement,
2292        cancel: CancelToken<'_>,
2293    ) -> Result<QueryResult, EngineError> {
2294        // v5.2.3: if the WHERE is a PK equality and matches a cold-
2295        // tier row, promote it back to the hot tier *before* the
2296        // hot-row walk. The promote pushes the row to the end of
2297        // `table.rows`, where the upcoming SET-evaluation loop will
2298        // pick it up and apply the assignments. Lookups for the key
2299        // never observe a gap because `promote_cold_row` inserts the
2300        // hot row before retiring the cold locator.
2301        if let Some(w) = &stmt.where_ {
2302            let schema_cols = self
2303                .active_catalog()
2304                .get(&stmt.table)
2305                .ok_or_else(|| {
2306                    EngineError::Storage(StorageError::TableNotFound {
2307                        name: stmt.table.clone(),
2308                    })
2309                })?
2310                .schema()
2311                .columns
2312                .clone();
2313            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2314                && let Some(idx_name) = self
2315                    .active_catalog()
2316                    .get(&stmt.table)
2317                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2318            {
2319                // Promote may be a no-op (key is hot-only or absent);
2320                // we don't care about the return value here — the
2321                // subsequent hot walk will either match or not.
2322                let _ = self
2323                    .active_catalog_mut()
2324                    .promote_cold_row(&stmt.table, &idx_name, &key);
2325            }
2326        }
2327
2328        let table = self
2329            .active_catalog_mut()
2330            .get_mut(&stmt.table)
2331            .ok_or_else(|| {
2332                EngineError::Storage(StorageError::TableNotFound {
2333                    name: stmt.table.clone(),
2334                })
2335            })?;
2336        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2337        // Resolve each SET target to a column position once, validate
2338        // up front so a typo'd column doesn't leave a partial mutation
2339        // behind.
2340        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
2341        for (col, expr) in &stmt.assignments {
2342            let pos = schema_cols
2343                .iter()
2344                .position(|c| c.name == *col)
2345                .ok_or_else(|| {
2346                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
2347                })?;
2348            targets.push((pos, expr));
2349        }
2350        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()));
2351        // Walk every row, evaluate WHERE then SET expressions. We
2352        // gather (position, new_values) tuples first and apply them
2353        // afterwards so the WHERE/RHS evaluation reads the original
2354        // row state — matches PG semantics (UPDATE doesn't see its
2355        // own writes).
2356        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
2357        for (i, row) in table.rows().iter().enumerate() {
2358            // v4.5: cooperative cancel checkpoint every 256 rows so
2359            // a runaway UPDATE without WHERE doesn't drag past the
2360            // server's query-timeout watchdog.
2361            if i.is_multiple_of(256) {
2362                cancel.check()?;
2363            }
2364            if let Some(w) = &stmt.where_ {
2365                let cond = eval::eval_expr(w, row, &ctx)?;
2366                if !matches!(cond, Value::Bool(true)) {
2367                    continue;
2368                }
2369            }
2370            let mut new_vals = row.values.clone();
2371            for (pos, expr) in &targets {
2372                let v = eval::eval_expr(expr, row, &ctx)?;
2373                new_vals[*pos] =
2374                    coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
2375            }
2376            planned.push((i, new_vals));
2377        }
2378        // v7.6.6 — capture pre-update row values for the FK
2379        // enforcement passes below. `planned` carries new values
2380        // only; pair them with the old row.
2381        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
2382            .iter()
2383            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
2384            .collect();
2385        let self_fks = table.schema().foreign_keys.clone();
2386        let affected = planned.len();
2387        // Release mutable borrow on `table` for the FK passes.
2388        let _ = table;
2389        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
2390        // local FK columns changed, the new value must exist in the
2391        // parent.
2392        if !self_fks.is_empty() {
2393            let new_rows: Vec<Vec<Value>> = planned
2394                .iter()
2395                .map(|(_pos, new_vals)| new_vals.clone())
2396                .collect();
2397            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
2398        }
2399        // v7.6.6 — Stage 2b: inbound FK check. For every row that
2400        // changed value in a column that *some other table* uses as
2401        // a FK parent column, react per `on_update` action.
2402        let child_plan =
2403            plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
2404        // Stage 3a — apply each child-side action.
2405        for step in &child_plan {
2406            apply_fk_child_step(self.active_catalog_mut(), step)?;
2407        }
2408        // Stage 3b — apply the original UPDATE.
2409        let table = self
2410            .active_catalog_mut()
2411            .get_mut(&stmt.table)
2412            .ok_or_else(|| {
2413                EngineError::Storage(StorageError::TableNotFound {
2414                    name: stmt.table.clone(),
2415                })
2416            })?;
2417        // v7.9.4 — snapshot post-update values for RETURNING.
2418        let updated_for_returning: Vec<Vec<Value>> = if stmt.returning.is_some() {
2419            planned.iter().map(|(_pos, vals)| vals.clone()).collect()
2420        } else {
2421            Vec::new()
2422        };
2423        for (pos, vals) in planned {
2424            table.update_row(pos, vals)?;
2425        }
2426        let _ = table;
2427        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
2428        if !self.in_transaction() && affected > 0 {
2429            self.statistics
2430                .record_modifications(&stmt.table, affected as u64);
2431        }
2432        // v7.9.4 — RETURNING projection.
2433        if let Some(items) = &stmt.returning {
2434            return self.build_returning_rows(&stmt.table, items, updated_for_returning);
2435        }
2436        Ok(QueryResult::CommandOk {
2437            affected,
2438            modified_catalog: !self.in_transaction(),
2439        })
2440    }
2441
2442    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
2443    /// positions then delegates to `Table::delete_rows` (single index
2444    /// rebuild for the batch).
2445    fn exec_delete_cancel(
2446        &mut self,
2447        stmt: &spg_sql::ast::DeleteStatement,
2448        cancel: CancelToken<'_>,
2449    ) -> Result<QueryResult, EngineError> {
2450        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
2451        // locator for the key. The cold row body stays in the
2452        // segment (becoming shadowed garbage that a future
2453        // compaction pass reclaims) but the index no longer
2454        // resolves it. The shadow count contributes to the
2455        // affected total; the subsequent hot walk handles any hot
2456        // rows for the same key.
2457        let mut cold_shadow_count: usize = 0;
2458        if let Some(w) = &stmt.where_ {
2459            let schema_cols = self
2460                .active_catalog()
2461                .get(&stmt.table)
2462                .ok_or_else(|| {
2463                    EngineError::Storage(StorageError::TableNotFound {
2464                        name: stmt.table.clone(),
2465                    })
2466                })?
2467                .schema()
2468                .columns
2469                .clone();
2470            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2471                && let Some(idx_name) = self
2472                    .active_catalog()
2473                    .get(&stmt.table)
2474                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2475            {
2476                cold_shadow_count = self
2477                    .active_catalog_mut()
2478                    .shadow_cold_row(&stmt.table, &idx_name, &key)
2479                    .unwrap_or(0);
2480            }
2481        }
2482
2483        let table = self
2484            .active_catalog_mut()
2485            .get_mut(&stmt.table)
2486            .ok_or_else(|| {
2487                EngineError::Storage(StorageError::TableNotFound {
2488                    name: stmt.table.clone(),
2489                })
2490            })?;
2491        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2492        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()));
2493        let mut positions: Vec<usize> = Vec::new();
2494        // v7.6.3 — collect every to-delete row's full Value tuple
2495        // alongside its position, so the FK enforcement pass can
2496        // run after the mut borrow drops.
2497        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
2498        for (i, row) in table.rows().iter().enumerate() {
2499            if i.is_multiple_of(256) {
2500                cancel.check()?;
2501            }
2502            let keep = if let Some(w) = &stmt.where_ {
2503                let cond = eval::eval_expr(w, row, &ctx)?;
2504                !matches!(cond, Value::Bool(true))
2505            } else {
2506                false
2507            };
2508            if !keep {
2509                positions.push(i);
2510                to_delete_rows.push(row.values.clone());
2511            }
2512        }
2513        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
2514        // catalog. Release the mut borrow and run reverse-scan
2515        // against every child table whose FK targets this table.
2516        // RESTRICT / NoAction raise an error; CASCADE returns a
2517        // cascade plan that stage 3 applies after the primary delete.
2518        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
2519        let _ = table;
2520        let cascade_plan = plan_fk_parent_deletions(
2521            self.active_catalog(),
2522            &stmt.table,
2523            &positions,
2524            &to_delete_rows,
2525        )?;
2526        // Stage 3a — apply each FK child step (SET NULL / SET
2527        // DEFAULT / CASCADE delete) before deleting the parent.
2528        // The plan is already ordered: nulls/defaults first, then
2529        // cascade deletes (so a row mutated and later deleted
2530        // surfaces as deleted — though v7.6.5 doesn't produce
2531        // that overlap today).
2532        for step in &cascade_plan {
2533            apply_fk_child_step(self.active_catalog_mut(), step)?;
2534        }
2535        // Stage 3b — actually delete the original target rows.
2536        let table = self
2537            .active_catalog_mut()
2538            .get_mut(&stmt.table)
2539            .ok_or_else(|| {
2540                EngineError::Storage(StorageError::TableNotFound {
2541                    name: stmt.table.clone(),
2542                })
2543            })?;
2544        let affected = table.delete_rows(&positions) + cold_shadow_count;
2545        let _ = table;
2546        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
2547        if !self.in_transaction() && affected > 0 {
2548            self.statistics
2549                .record_modifications(&stmt.table, affected as u64);
2550        }
2551        // v7.9.4 — RETURNING projection over the soon-to-be-gone
2552        // rows. `to_delete_rows` was snapshotted in stage 1 before
2553        // mutation, so the projection sees the pre-delete state
2554        // (matches PG semantics: DELETE RETURNING returns the row
2555        // as it was just before removal).
2556        if let Some(items) = &stmt.returning {
2557            return self.build_returning_rows(&stmt.table, items, to_delete_rows);
2558        }
2559        Ok(QueryResult::CommandOk {
2560            affected,
2561            modified_catalog: !self.in_transaction(),
2562        })
2563    }
2564
2565    /// `SHOW TABLES` — one row per table in the active catalog.
2566    /// Column name is `name` so result-set consumers can downstream
2567    /// `SELECT name FROM ...` style logic if needed.
2568    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
2569    /// `QUERY PLAN` text table — first line names the top operator
2570    /// (Scan / Aggregate / Window / etc.), indented children list
2571    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
2572    /// shape, and any active index hits. `ANALYZE` execs the inner
2573    /// SELECT and appends actual-row + elapsed-micros annotations.
2574    #[allow(clippy::format_push_string)]
2575    fn exec_explain(
2576        &self,
2577        e: &spg_sql::ast::ExplainStatement,
2578        cancel: CancelToken<'_>,
2579    ) -> Result<QueryResult, EngineError> {
2580        let mut lines = Vec::<String>::new();
2581        explain_select(&e.inner, self, 0, &mut lines);
2582        if e.suggest {
2583            // v6.8.3 — index advisor. Walks the SELECT's FROM
2584            // tables + WHERE column refs; for each (table, column)
2585            // pair that lacks an index, append a SUGGEST line with
2586            // a copy-pastable `CREATE INDEX` statement. This is a
2587            // pure-syntax heuristic — no cardinality estimation —
2588            // matching the v6.8.3 design intent of "tell the
2589            // operator where indexes are missing", not "give the
2590            // mathematically optimal index set".
2591            let suggestions = build_index_suggestions(&e.inner, self);
2592            for s in suggestions {
2593                lines.push(s);
2594            }
2595        } else if e.analyze {
2596            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
2597            // with `(rows=N)` where the row count is computable
2598            // without re-executing the full query:
2599            //   - Top-level operator (first non-indented line):
2600            //     rows = final result.len()
2601            //   - "From: <table> [full scan]" lines: rows =
2602            //     table.rows().len() (catalog read; no execution)
2603            //   - "From: <table> [index seek]": indeterminate —
2604            //     the index step would need re-execution; v6.2.5
2605            //     adds per-operator wall-clock + hot/cold rows
2606            //     instrumentation that makes this concrete.
2607            //   - Everything else: marked `(—)` so the surface
2608            //     stays well-defined without silently dropping
2609            //     stats. v6.2.5 fills in via inline executor
2610            //     instrumentation.
2611            // Total elapsed lands on a trailing `Total: …` line.
2612            let started = self.clock.map(|f| f());
2613            let exec = self.exec_select_cancel(&e.inner, cancel)?;
2614            let elapsed_micros = match (self.clock, started) {
2615                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
2616                _ => None,
2617            };
2618            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
2619                rows.len()
2620            } else {
2621                0
2622            };
2623            annotate_explain_lines(&mut lines, row_count, self);
2624            let mut total = alloc::format!("Total: rows={row_count}");
2625            if let Some(us) = elapsed_micros {
2626                total.push_str(&alloc::format!(" elapsed={us}us"));
2627            }
2628            lines.push(total);
2629        }
2630        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
2631        let rows: Vec<Row> = lines
2632            .into_iter()
2633            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
2634            .collect();
2635        Ok(QueryResult::Rows { columns, rows })
2636    }
2637
2638    fn exec_show_tables(&self) -> QueryResult {
2639        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
2640        let rows: Vec<Row> = self
2641            .active_catalog()
2642            .table_names()
2643            .into_iter()
2644            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
2645            .collect();
2646        QueryResult::Rows { columns, rows }
2647    }
2648
2649    /// `SHOW COLUMNS FROM <table>` — one row per column with the
2650    /// declared name, SQL type rendering, and nullability flag.
2651    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
2652        let table =
2653            self.active_catalog()
2654                .get(table_name)
2655                .ok_or_else(|| StorageError::TableNotFound {
2656                    name: table_name.into(),
2657                })?;
2658        let columns = alloc::vec![
2659            ColumnSchema::new("name", DataType::Text, false),
2660            ColumnSchema::new("type", DataType::Text, false),
2661            ColumnSchema::new("nullable", DataType::Bool, false),
2662        ];
2663        let rows: Vec<Row> = table
2664            .schema()
2665            .columns
2666            .iter()
2667            .map(|c| {
2668                Row::new(alloc::vec![
2669                    Value::Text(c.name.clone()),
2670                    Value::Text(alloc::format!("{}", c.ty)),
2671                    Value::Bool(c.nullable),
2672                ])
2673            })
2674            .collect();
2675        Ok(QueryResult::Rows { columns, rows })
2676    }
2677
2678    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
2679        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2680        if self.tx_catalogs.contains_key(&tx_id) {
2681            return Err(EngineError::TransactionAlreadyOpen);
2682        }
2683        self.tx_catalogs.insert(
2684            tx_id,
2685            TxState {
2686                catalog: self.catalog.clone(),
2687                savepoints: Vec::new(),
2688            },
2689        );
2690        Ok(QueryResult::CommandOk {
2691            affected: 0,
2692            modified_catalog: false,
2693        })
2694    }
2695
2696    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
2697        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2698        let state = self
2699            .tx_catalogs
2700            .remove(&tx_id)
2701            .ok_or(EngineError::NoActiveTransaction)?;
2702        self.catalog = state.catalog;
2703        // All savepoints become permanent at COMMIT and the stack
2704        // resets for the next TX (`state.savepoints` is discarded with
2705        // `state`).
2706        Ok(QueryResult::CommandOk {
2707            affected: 0,
2708            modified_catalog: true,
2709        })
2710    }
2711
2712    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
2713        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2714        if self.tx_catalogs.remove(&tx_id).is_none() {
2715            return Err(EngineError::NoActiveTransaction);
2716        }
2717        // savepoints discarded with the TxState
2718        Ok(QueryResult::CommandOk {
2719            affected: 0,
2720            modified_catalog: false,
2721        })
2722    }
2723
2724    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
2725        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2726        let state = self
2727            .tx_catalogs
2728            .get_mut(&tx_id)
2729            .ok_or(EngineError::NoActiveTransaction)?;
2730        // PG re-uses an existing savepoint name by dropping the older
2731        // entry and pushing a fresh one — match that behaviour so
2732        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
2733        state.savepoints.retain(|(n, _)| n != &name);
2734        let snapshot = state.catalog.clone();
2735        state.savepoints.push((name, snapshot));
2736        Ok(QueryResult::CommandOk {
2737            affected: 0,
2738            modified_catalog: false,
2739        })
2740    }
2741
2742    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2743        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2744        let state = self
2745            .tx_catalogs
2746            .get_mut(&tx_id)
2747            .ok_or(EngineError::NoActiveTransaction)?;
2748        let pos = state
2749            .savepoints
2750            .iter()
2751            .rposition(|(n, _)| n == name)
2752            .ok_or_else(|| {
2753                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
2754            })?;
2755        // The savepoint stays on the stack (PG semantics): a later
2756        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
2757        // after it is discarded.
2758        let snapshot = state.savepoints[pos].1.clone();
2759        state.savepoints.truncate(pos + 1);
2760        state.catalog = snapshot;
2761        Ok(QueryResult::CommandOk {
2762            affected: 0,
2763            modified_catalog: false,
2764        })
2765    }
2766
2767    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2768        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2769        let state = self
2770            .tx_catalogs
2771            .get_mut(&tx_id)
2772            .ok_or(EngineError::NoActiveTransaction)?;
2773        let pos = state
2774            .savepoints
2775            .iter()
2776            .rposition(|(n, _)| n == name)
2777            .ok_or_else(|| {
2778                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
2779            })?;
2780        // RELEASE keeps the work since the savepoint, just discards the
2781        // bookmark plus everything nested under it.
2782        state.savepoints.truncate(pos);
2783        Ok(QueryResult::CommandOk {
2784            affected: 0,
2785            modified_catalog: false,
2786        })
2787    }
2788
2789    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
2790    /// (encoding = …)]`. Walks every table in the active catalog
2791    /// looking for an index matching `stmt.name`, then delegates the
2792    /// rebuild (including any encoding switch) to
2793    /// `Table::rebuild_nsw_index`. The "live" non-blocking
2794    /// optimisation is v6.0.4.1 / v6.1.x territory.
2795    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
2796    /// arm. Currently the only setting is `hot_tier_bytes`; later
2797    /// v6.7.x can extend `AlterTableTarget` without touching this
2798    /// arm structure.
2799    fn exec_alter_table(
2800        &mut self,
2801        s: spg_sql::ast::AlterTableStatement,
2802    ) -> Result<QueryResult, EngineError> {
2803        match s.target {
2804            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
2805                let table = self.active_catalog_mut().get_mut(&s.name).ok_or_else(|| {
2806                    EngineError::Storage(StorageError::TableNotFound {
2807                        name: s.name.clone(),
2808                    })
2809                })?;
2810                table.schema_mut().hot_tier_bytes = Some(n);
2811            }
2812            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
2813                // v7.6.8 — resolve FK against the live catalog first
2814                // (validates parent table, columns, indices). Then
2815                // verify every existing row in the child table
2816                // satisfies the new constraint. Then install it.
2817                let cols_snapshot = self
2818                    .active_catalog()
2819                    .get(&s.name)
2820                    .ok_or_else(|| {
2821                        EngineError::Storage(StorageError::TableNotFound {
2822                            name: s.name.clone(),
2823                        })
2824                    })?
2825                    .schema()
2826                    .columns
2827                    .clone();
2828                let storage_fk =
2829                    resolve_foreign_key(&s.name, &cols_snapshot, fk, self.active_catalog())?;
2830                // Verify existing rows. Treat them as a virtual
2831                // INSERT batch — reusing the v7.6.2 enforce helper.
2832                let existing_rows: Vec<Vec<Value>> = self
2833                    .active_catalog()
2834                    .get(&s.name)
2835                    .expect("checked above")
2836                    .rows()
2837                    .iter()
2838                    .map(|r| r.values.clone())
2839                    .collect();
2840                enforce_fk_inserts(
2841                    self.active_catalog(),
2842                    &s.name,
2843                    core::slice::from_ref(&storage_fk),
2844                    &existing_rows,
2845                )?;
2846                // Reject duplicate constraint name.
2847                let table = self
2848                    .active_catalog_mut()
2849                    .get_mut(&s.name)
2850                    .expect("checked above");
2851                if let Some(name) = &storage_fk.name
2852                    && table
2853                        .schema()
2854                        .foreign_keys
2855                        .iter()
2856                        .any(|f| f.name.as_ref() == Some(name))
2857                {
2858                    return Err(EngineError::Unsupported(alloc::format!(
2859                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
2860                    )));
2861                }
2862                table.schema_mut().foreign_keys.push(storage_fk);
2863            }
2864            spg_sql::ast::AlterTableTarget::DropForeignKey(name) => {
2865                let table = self.active_catalog_mut().get_mut(&s.name).ok_or_else(|| {
2866                    EngineError::Storage(StorageError::TableNotFound {
2867                        name: s.name.clone(),
2868                    })
2869                })?;
2870                let fks = &mut table.schema_mut().foreign_keys;
2871                let before = fks.len();
2872                fks.retain(|f| f.name.as_ref() != Some(&name));
2873                if fks.len() == before {
2874                    return Err(EngineError::Unsupported(alloc::format!(
2875                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
2876                        s.name
2877                    )));
2878                }
2879            }
2880        }
2881        Ok(QueryResult::CommandOk {
2882            affected: 0,
2883            modified_catalog: !self.in_transaction(),
2884        })
2885    }
2886
2887    fn exec_alter_index(
2888        &mut self,
2889        stmt: spg_sql::ast::AlterIndexStatement,
2890    ) -> Result<QueryResult, EngineError> {
2891        // Translate the optional SQL-side encoding choice into the
2892        // storage-side enum; the same SqlVecEncoding -> VecEncoding
2893        // bridge `column_type_to_data_type` uses.
2894        let spg_sql::ast::AlterIndexStatement {
2895            name: idx_name,
2896            target,
2897        } = stmt;
2898        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target;
2899        let target = encoding.map(|e| match e {
2900            SqlVecEncoding::F32 => VecEncoding::F32,
2901            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
2902            SqlVecEncoding::F16 => VecEncoding::F16,
2903        });
2904        // Linear scan: index names are globally unique within a
2905        // catalog (enforced by add_nsw_index_inner) so the first
2906        // match is the only one. Save the table name to avoid
2907        // borrowing while we then take a mut borrow.
2908        let table_name = {
2909            let cat = self.active_catalog();
2910            let mut found: Option<String> = None;
2911            for tname in cat.table_names() {
2912                if let Some(t) = cat.get(&tname)
2913                    && t.indices().iter().any(|i| i.name == idx_name)
2914                {
2915                    found = Some(tname);
2916                    break;
2917                }
2918            }
2919            found.ok_or_else(|| {
2920                EngineError::Storage(StorageError::IndexNotFound {
2921                    name: idx_name.clone(),
2922                })
2923            })?
2924        };
2925        let table = self
2926            .active_catalog_mut()
2927            .get_mut(&table_name)
2928            .expect("table found above");
2929        table.rebuild_nsw_index(&idx_name, target)?;
2930        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
2931        // changes cost characteristics; evict any cached plans.
2932        self.plan_cache.evict_referencing(&table_name);
2933        Ok(QueryResult::CommandOk {
2934            affected: 0,
2935            modified_catalog: !self.in_transaction(),
2936        })
2937    }
2938
2939    fn exec_create_index(
2940        &mut self,
2941        stmt: CreateIndexStatement,
2942    ) -> Result<QueryResult, EngineError> {
2943        let table = self
2944            .active_catalog_mut()
2945            .get_mut(&stmt.table)
2946            .ok_or_else(|| {
2947                EngineError::Storage(StorageError::TableNotFound {
2948                    name: stmt.table.clone(),
2949                })
2950            })?;
2951        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
2952        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
2953            return Ok(QueryResult::CommandOk {
2954                affected: 0,
2955                modified_catalog: false,
2956            });
2957        }
2958        // v7.9.14 — multi-column index parses through; engine
2959        // builds a single-column BTree on the leading column only.
2960        // The extras live on the AST so spg-server's dispatcher
2961        // can emit a PG-wire NoticeResponse / log line. Composite
2962        // BTree keys land in v7.10.
2963        let _ = &stmt.extra_columns; // intentional drop on engine side
2964        let table_name = stmt.table.clone();
2965        // v6.8.0 — resolve INCLUDE column names to positions. Done
2966        // before `add_index` so a typo error surfaces before any
2967        // catalog mutation lands.
2968        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
2969            Vec::new()
2970        } else {
2971            let schema = table.schema();
2972            stmt.included_columns
2973                .iter()
2974                .map(|c| {
2975                    schema.column_position(c).ok_or_else(|| {
2976                        EngineError::Storage(StorageError::ColumnNotFound { column: c.clone() })
2977                    })
2978                })
2979                .collect::<Result<Vec<_>, _>>()?
2980        };
2981        match stmt.method {
2982            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
2983            IndexMethod::Hnsw => {
2984                if !included_positions.is_empty() {
2985                    return Err(EngineError::Unsupported(
2986                        "INCLUDE columns are not supported on HNSW indexes".into(),
2987                    ));
2988                }
2989                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
2990            }
2991            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
2992            IndexMethod::Brin => {
2993                if !included_positions.is_empty() {
2994                    return Err(EngineError::Unsupported(
2995                        "INCLUDE columns are not supported on BRIN indexes".into(),
2996                    ));
2997                }
2998                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
2999            }
3000        }
3001        if !included_positions.is_empty()
3002            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
3003        {
3004            idx.included_columns = included_positions;
3005        }
3006        // v6.8.1 — persist partial-index predicate. Stored as the
3007        // expression's Display form so the catalog snapshot stays
3008        // pure (storage has no spg-sql dependency). The runtime
3009        // maintenance path treats partial indexes identically to
3010        // full indexes for v6.8.1 (over-maintenance is safe; the
3011        // planner-side "use partial when query WHERE implies the
3012        // predicate" pass is STABILITY carve-out).
3013        if let Some(pred_expr) = &stmt.partial_predicate {
3014            let canonical = pred_expr.to_string();
3015            if matches!(stmt.method, IndexMethod::Hnsw | IndexMethod::Brin) {
3016                return Err(EngineError::Unsupported(
3017                    "WHERE predicates are not supported on HNSW or BRIN indexes".into(),
3018                ));
3019            }
3020            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3021                idx.partial_predicate = Some(canonical);
3022            }
3023        }
3024        // v6.8.2 — persist expression index key. Same Display-form
3025        // storage; the runtime maintenance pass evaluates each
3026        // row's expression to derive the index key, but for v6.8.2
3027        // the engine falls through to the bare-column-reference
3028        // path and the expression is preserved for format-layer
3029        // round-trip + future planner work. Carved-out in
3030        // STABILITY § "Out of v6.8".
3031        if let Some(key_expr) = &stmt.expression {
3032            if matches!(stmt.method, IndexMethod::Hnsw | IndexMethod::Brin) {
3033                return Err(EngineError::Unsupported(
3034                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
3035                ));
3036            }
3037            let canonical = key_expr.to_string();
3038            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3039                idx.expression = Some(canonical);
3040            }
3041        }
3042        // v7.9.29 — persist `is_unique` flag on the storage Index.
3043        // Combined with `partial_predicate`, INSERT enforcement
3044        // checks that no other row whose predicate evaluates true
3045        // shares the same indexed key. Parser already rejected
3046        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
3047        // For multi-column UNIQUE INDEX the extras matter (the
3048        // full tuple is the uniqueness key), so resolve them to
3049        // column positions and persist on the index too.
3050        if stmt.is_unique {
3051            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
3052            for col_name in &stmt.extra_columns {
3053                let pos = table
3054                    .schema()
3055                    .columns
3056                    .iter()
3057                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
3058                    .ok_or_else(|| {
3059                        EngineError::Unsupported(alloc::format!(
3060                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
3061                            stmt.name,
3062                            stmt.table
3063                        ))
3064                    })?;
3065                extra_positions.push(pos);
3066            }
3067            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3068                idx.is_unique = true;
3069                idx.extra_column_positions = extra_positions;
3070            }
3071            // At index-creation time, check the existing rows for
3072            // pre-existing duplicates that would have violated the
3073            // new constraint — otherwise CREATE UNIQUE INDEX would
3074            // silently leave duplicates in place.
3075            let snapshot_indices = table.indices().to_vec();
3076            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
3077                table.rows().iter().cloned().collect();
3078            let snapshot_schema = table.schema().clone();
3079            let idx_ref = snapshot_indices
3080                .iter()
3081                .find(|i| i.name == stmt.name)
3082                .expect("just-added index");
3083            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
3084        }
3085        // v6.3.1 — adding an index can change the optimal plan for
3086        // any cached query that references this table.
3087        self.plan_cache.evict_referencing(&table_name);
3088        Ok(QueryResult::CommandOk {
3089            affected: 0,
3090            modified_catalog: !self.in_transaction(),
3091        })
3092    }
3093
3094    fn exec_create_table(
3095        &mut self,
3096        stmt: CreateTableStatement,
3097    ) -> Result<QueryResult, EngineError> {
3098        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
3099            return Ok(QueryResult::CommandOk {
3100                affected: 0,
3101                modified_catalog: false,
3102            });
3103        }
3104        let table_name = stmt.name.clone();
3105        // v7.9.13 — pluck the names of any columns marked
3106        // `PRIMARY KEY` inline so the post-create-table pass can
3107        // build an implicit BTree index. mailrs F1.
3108        let inline_pk_columns: Vec<String> = stmt
3109            .columns
3110            .iter()
3111            .filter(|c| c.is_primary_key)
3112            .map(|c| c.name.clone())
3113            .collect();
3114        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
3115        // and UNIQUE (a, b, ...). Each builds a BTree index on the
3116        // leading column (the existing single-column storage tier)
3117        // and registers a UniquenessConstraint on the schema for
3118        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
3119        let cols = stmt
3120            .columns
3121            .into_iter()
3122            .map(column_def_to_schema)
3123            .collect::<Result<Vec<_>, _>>()?;
3124        // Composite NOT-NULL implication for PRIMARY KEY columns.
3125        let mut cols = cols;
3126        for tc in &stmt.table_constraints {
3127            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
3128                for col_name in columns {
3129                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
3130                        col.nullable = false;
3131                    }
3132                }
3133            }
3134        }
3135        // v7.6.1 — resolve every FK in the statement against the
3136        // already-known catalog. Validates: parent table exists,
3137        // parent column names exist, arity matches, parent columns
3138        // have a PK / UNIQUE index. Self-referencing FKs (parent
3139        // table == this table) resolve against the column list we
3140        // just built — they don't need the catalog yet.
3141        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
3142            Vec::with_capacity(stmt.foreign_keys.len());
3143        for fk in stmt.foreign_keys {
3144            fks.push(resolve_foreign_key(
3145                &table_name,
3146                &cols,
3147                fk,
3148                self.active_catalog(),
3149            )?);
3150        }
3151        let mut schema = TableSchema::new(table_name.clone(), cols);
3152        schema.foreign_keys = fks;
3153        // v7.9.19 — translate AST table_constraints to storage
3154        // UniquenessConstraints (column name → position) so the
3155        // INSERT enforcement helper sees positions directly.
3156        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
3157        for tc in &stmt.table_constraints {
3158            let (is_pk, names) = match tc {
3159                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
3160                    (true, columns.clone())
3161                }
3162                spg_sql::ast::TableConstraint::Unique { columns, .. } => (false, columns.clone()),
3163            };
3164            let mut positions = Vec::with_capacity(names.len());
3165            for n in &names {
3166                let pos = schema
3167                    .columns
3168                    .iter()
3169                    .position(|c| c.name == *n)
3170                    .ok_or_else(|| {
3171                        EngineError::Unsupported(alloc::format!(
3172                            "table constraint references unknown column {n:?}"
3173                        ))
3174                    })?;
3175                positions.push(pos);
3176            }
3177            uc_storage.push(spg_storage::UniquenessConstraint {
3178                is_primary_key: is_pk,
3179                columns: positions,
3180            });
3181        }
3182        schema.uniqueness_constraints = uc_storage.clone();
3183        self.active_catalog_mut().create_table(schema)?;
3184        // v7.9.13 — implicit BTree per inline PK column +
3185        // v7.9.19 — implicit BTree on the leading column of every
3186        // table-level PRIMARY KEY / UNIQUE constraint.
3187        let table = self
3188            .active_catalog_mut()
3189            .get_mut(&table_name)
3190            .expect("just created");
3191        for (i, col_name) in inline_pk_columns.iter().enumerate() {
3192            let idx_name = if inline_pk_columns.len() == 1 {
3193                alloc::format!("{table_name}_pkey")
3194            } else {
3195                alloc::format!("{table_name}_pkey_{i}")
3196            };
3197            if let Err(e) = table.add_index(idx_name, col_name) {
3198                return Err(EngineError::Storage(e));
3199            }
3200        }
3201        for (i, tc) in stmt.table_constraints.iter().enumerate() {
3202            let (is_pk, names) = match tc {
3203                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => (true, columns),
3204                spg_sql::ast::TableConstraint::Unique { columns, .. } => (false, columns),
3205            };
3206            let leading = &names[0];
3207            // Skip if a same-column BTree already exists (e.g.
3208            // inline PK on the leading column).
3209            let already = table.indices().iter().any(|idx| {
3210                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3211                    && table.schema().columns[idx.column_position].name == *leading
3212            });
3213            if already {
3214                continue;
3215            }
3216            let suffix = if is_pk { "pkey" } else { "key" };
3217            let idx_name = if names.len() == 1 {
3218                alloc::format!("{table_name}_{leading}_{suffix}")
3219            } else {
3220                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
3221            };
3222            if let Err(e) = table.add_index(idx_name, leading) {
3223                return Err(EngineError::Storage(e));
3224            }
3225        }
3226        Ok(QueryResult::CommandOk {
3227            affected: 0,
3228            modified_catalog: !self.in_transaction(),
3229        })
3230    }
3231
3232    fn exec_insert(&mut self, stmt: InsertStatement) -> Result<QueryResult, EngineError> {
3233        // v7.9.21 — snapshot the clock fn pointer before the mut
3234        // borrow on the catalog opens; runtime DEFAULT eval needs
3235        // it inside the row hot loop.
3236        let clock = self.clock;
3237        let table = self
3238            .active_catalog_mut()
3239            .get_mut(&stmt.table)
3240            .ok_or_else(|| {
3241                EngineError::Storage(StorageError::TableNotFound {
3242                    name: stmt.table.clone(),
3243                })
3244            })?;
3245        // v3.1.5: clone the columns vector only (not the whole
3246        // TableSchema — saves one String alloc for the table name).
3247        // We need an owned snapshot because we'll call `table.insert`
3248        // (mutable borrow on `table`) inside the row loop while
3249        // reading schema fields.
3250        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
3251        let schema_cols_len = column_meta.len();
3252        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
3253        // column `c` is filled from the `j`-th tuple slot; `None` means
3254        // "fill with NULL". Validated once and reused for every row.
3255        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
3256            None => None, // 1-1 mapping, fast path
3257            Some(cols) => {
3258                let mut map = alloc::vec![None; schema_cols_len];
3259                for (j, name) in cols.iter().enumerate() {
3260                    let idx = column_meta
3261                        .iter()
3262                        .position(|c| c.name == *name)
3263                        .ok_or_else(|| {
3264                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
3265                        })?;
3266                    if map[idx].is_some() {
3267                        return Err(EngineError::Storage(StorageError::ArityMismatch {
3268                            expected: schema_cols_len,
3269                            actual: cols.len(),
3270                        }));
3271                    }
3272                    map[idx] = Some(j);
3273                }
3274                // Omitted columns must either be nullable, carry a
3275                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
3276                // omissions up front so the WAL stays clean.
3277                for (i, col) in column_meta.iter().enumerate() {
3278                    if map[i].is_none()
3279                        && !col.nullable
3280                        && col.default.is_none()
3281                        && col.runtime_default.is_none()
3282                        && !col.auto_increment
3283                    {
3284                        return Err(EngineError::Storage(StorageError::NullInNotNull {
3285                            column: col.name.clone(),
3286                        }));
3287                    }
3288                }
3289                Some(map)
3290            }
3291        };
3292        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
3293        // v7.6.2 — snapshot this table's FK list before the
3294        // mutable-borrow window so we can run parent lookups
3295        // against the immutable catalog after parsing. Empty vec is
3296        // the no-FK fast path; clone cost is O(fks * arity) which
3297        // is < 100 ns for typical schemas.
3298        let fks = table.schema().foreign_keys.clone();
3299        let mut affected = 0usize;
3300        // Stage 1 — parse + AUTO_INC + coerce all rows under the
3301        // single mutable borrow.
3302        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
3303        for tuple in stmt.rows {
3304            if tuple.len() != expected_tuple_len {
3305                return Err(EngineError::Storage(StorageError::ArityMismatch {
3306                    expected: expected_tuple_len,
3307                    actual: tuple.len(),
3308                }));
3309            }
3310            // Fast path: no column-list permutation → tuple slot j
3311            // maps to schema column j. We can zip schema with tuple
3312            // and skip the `raw_tuple` staging allocation entirely.
3313            let values: Vec<Value> = if let Some(map) = &tuple_pos {
3314                // Permuted path: still need raw_tuple to index by `map[i]`.
3315                let raw_tuple: Vec<Value> = tuple
3316                    .into_iter()
3317                    .map(literal_expr_to_value)
3318                    .collect::<Result<_, _>>()?;
3319                let mut out = Vec::with_capacity(schema_cols_len);
3320                for (i, col) in column_meta.iter().enumerate() {
3321                    let mut raw = match map[i] {
3322                        Some(j) => raw_tuple[j].clone(),
3323                        None => resolve_column_default_free(col, clock)?,
3324                    };
3325                    if col.auto_increment && raw.is_null() {
3326                        let next = table.next_auto_value(i).ok_or_else(|| {
3327                            EngineError::Unsupported(alloc::format!(
3328                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
3329                                col.name
3330                            ))
3331                        })?;
3332                        raw = Value::BigInt(next);
3333                    }
3334                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
3335                }
3336                out
3337            } else {
3338                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
3339                let mut out = Vec::with_capacity(schema_cols_len);
3340                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
3341                    let mut raw = literal_expr_to_value(expr)?;
3342                    if col.auto_increment && raw.is_null() {
3343                        let next = table.next_auto_value(i).ok_or_else(|| {
3344                            EngineError::Unsupported(alloc::format!(
3345                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
3346                                col.name
3347                            ))
3348                        })?;
3349                        raw = Value::BigInt(next);
3350                    }
3351                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
3352                }
3353                out
3354            };
3355            all_values.push(values);
3356        }
3357        // Stage 2 — FK enforcement on the immutable catalog.
3358        // Non-lexical lifetimes release the mutable borrow on
3359        // `table` here since stage 1 was the last use. The
3360        // parent-table lookup runs before any row is committed.
3361        let uniqueness = table.schema().uniqueness_constraints.clone();
3362        let _ = table;
3363        if !fks.is_empty() {
3364            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
3365        }
3366        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
3367        enforce_uniqueness_inserts(self.active_catalog(), &stmt.table, &uniqueness, &all_values)?;
3368        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
3369        // Independent of table-level UniquenessConstraint (which
3370        // can't carry a predicate). Walks the table's indexes;
3371        // for each `is_unique` index, only rows whose
3372        // partial_predicate evaluates truthy are checked for
3373        // collision. mailrs K1.
3374        enforce_unique_index_inserts(self.active_catalog(), &stmt.table, &all_values)?;
3375        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
3376        //   - `DO NOTHING` filters `all_values` to non-conflicting
3377        //     rows + drops within-batch duplicates.
3378        //   - `DO UPDATE SET …` ALSO filters, but for each
3379        //     conflicting row it queues an UPDATE on the existing
3380        //     row using the incoming row's values as `EXCLUDED.*`.
3381        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
3382        let mut skipped_count = 0usize;
3383        if let Some(clause) = &stmt.on_conflict {
3384            let conflict_cols = resolve_on_conflict_columns(
3385                self.active_catalog(),
3386                &stmt.table,
3387                clause.target_columns.as_slice(),
3388            )?;
3389            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
3390            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
3391            for values in all_values {
3392                let key_tuple: Vec<&Value> = conflict_cols.iter().map(|&c| &values[c]).collect();
3393                // SQL spec: NULL in any conflict column means "no
3394                // conflict possible" (NULL ≠ NULL for uniqueness).
3395                let has_null_key = key_tuple.iter().any(|v| matches!(v, Value::Null));
3396                let collides_with_table = !has_null_key
3397                    && on_conflict_keys_exist(
3398                        self.active_catalog(),
3399                        &stmt.table,
3400                        &conflict_cols,
3401                        &key_tuple,
3402                    );
3403                let key_tuple_owned: Vec<Value> = key_tuple.iter().map(|v| (*v).clone()).collect();
3404                let collides_with_batch =
3405                    !has_null_key && seen_keys.iter().any(|k| k == &key_tuple_owned);
3406                let collides = collides_with_table || collides_with_batch;
3407                match (&clause.action, collides) {
3408                    (_, false) => {
3409                        seen_keys.push(key_tuple_owned);
3410                        kept.push(values);
3411                    }
3412                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
3413                        skipped_count += 1;
3414                    }
3415                    (
3416                        spg_sql::ast::OnConflictAction::Update {
3417                            assignments,
3418                            where_,
3419                        },
3420                        true,
3421                    ) => {
3422                        if !collides_with_table {
3423                            skipped_count += 1;
3424                            continue;
3425                        }
3426                        let target_pos = lookup_row_position_by_keys(
3427                            self.active_catalog(),
3428                            &stmt.table,
3429                            &conflict_cols,
3430                            &key_tuple,
3431                        )
3432                        .ok_or_else(|| {
3433                            EngineError::Unsupported(
3434                                "ON CONFLICT DO UPDATE: conflict detected but row \
3435                                 position could not be resolved (cold-tier row?)"
3436                                    .into(),
3437                            )
3438                        })?;
3439                        let updated = apply_on_conflict_assignments(
3440                            self.active_catalog(),
3441                            &stmt.table,
3442                            target_pos,
3443                            &values,
3444                            assignments,
3445                            where_.as_ref(),
3446                        )?;
3447                        if let Some(new_row) = updated {
3448                            pending_updates.push((target_pos, new_row));
3449                        } else {
3450                            skipped_count += 1;
3451                        }
3452                    }
3453                }
3454            }
3455            all_values = kept;
3456        }
3457        // Stage 3 — insert all rows under a fresh mutable borrow.
3458        let table = self
3459            .active_catalog_mut()
3460            .get_mut(&stmt.table)
3461            .ok_or_else(|| {
3462                EngineError::Storage(StorageError::TableNotFound {
3463                    name: stmt.table.clone(),
3464                })
3465            })?;
3466        // v7.9.4 — keep RETURNING projection rows separate per
3467        // INSERT and per UPDATE branch so DO UPDATE pushes the new
3468        // post-update state, not the incoming-only values.
3469        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
3470        for values in all_values {
3471            if stmt.returning.is_some() {
3472                returning_rows.push(values.clone());
3473            }
3474            table.insert(Row::new(values))?;
3475            affected += 1;
3476        }
3477        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
3478        // in the conflict-resolution pass. update_row handles
3479        // index maintenance + body re-encoding.
3480        for (pos, new_row) in pending_updates {
3481            if stmt.returning.is_some() {
3482                returning_rows.push(new_row.clone());
3483            }
3484            table.update_row(pos, new_row)?;
3485            affected += 1;
3486        }
3487        let _ = skipped_count;
3488        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
3489        // up in the table after this statement (insert or
3490        // post-update on conflict).
3491        if let Some(items) = &stmt.returning {
3492            let _ = table;
3493            return self.build_returning_rows(&stmt.table, items, returning_rows);
3494        }
3495        // v6.2.1 — auto-analyze: track per-table modified-row
3496        // counter so the background sweep can decide when to
3497        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
3498        // — one BTreeMap entry update per INSERT batch.
3499        if !self.in_transaction() && affected > 0 {
3500            self.statistics
3501                .record_modifications(&stmt.table, affected as u64);
3502        }
3503        Ok(QueryResult::CommandOk {
3504            affected,
3505            modified_catalog: !self.in_transaction(),
3506        })
3507    }
3508
3509    /// v4.5: SELECT with cooperative cancellation. The token is
3510    /// honoured between UNION peers and inside the bare-SELECT row
3511    /// loop; HNSW kNN graph walks and the aggregate executor don't
3512    /// honour it yet (deferred — those paths bound their work
3513    /// internally by `LIMIT k` and `GROUP BY` cardinality).
3514    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
3515    /// by id, decodes each row body against the table's current
3516    /// schema, applies the SELECT's projection + optional WHERE +
3517    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
3518    /// / ORDER BY are unsupported on this path (STABILITY carve-
3519    /// out); operators wanting them should restore the segment
3520    /// into a regular table first.
3521    fn exec_select_as_of_segment(
3522        &self,
3523        stmt: &SelectStatement,
3524        from: &spg_sql::ast::FromClause,
3525        segment_id: u32,
3526    ) -> Result<QueryResult, EngineError> {
3527        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
3528        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
3529        if !from.joins.is_empty()
3530            || stmt.group_by.is_some()
3531            || stmt.having.is_some()
3532            || !stmt.unions.is_empty()
3533            || !stmt.order_by.is_empty()
3534            || stmt.offset.is_some()
3535            || stmt.distinct
3536            || aggregate::uses_aggregate(stmt)
3537        {
3538            return Err(EngineError::Unsupported(
3539                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
3540                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
3541                    .into(),
3542            ));
3543        }
3544        let table = self
3545            .active_catalog()
3546            .get(&from.primary.name)
3547            .ok_or_else(|| StorageError::TableNotFound {
3548                name: from.primary.name.clone(),
3549            })?;
3550        let schema = table.schema().clone();
3551        let schema_cols = &schema.columns;
3552        let alias = from
3553            .primary
3554            .alias
3555            .as_deref()
3556            .unwrap_or(from.primary.name.as_str());
3557        let ctx = EvalContext::new(schema_cols, Some(alias));
3558        let seg = self
3559            .active_catalog()
3560            .cold_segment(segment_id)
3561            .ok_or_else(|| {
3562                EngineError::Unsupported(alloc::format!(
3563                    "AS OF SEGMENT: cold segment {segment_id} not registered"
3564                ))
3565            })?;
3566        let mut out_rows: Vec<Row> = Vec::new();
3567        let mut limit_remaining: Option<usize> =
3568            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
3569        for (_key, body) in seg.scan() {
3570            let (row, _consumed) =
3571                spg_storage::decode_row_body_dense(&body, &schema).map_err(EngineError::Storage)?;
3572            if let Some(where_expr) = &stmt.where_ {
3573                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
3574                if !matches!(cond, Value::Bool(true)) {
3575                    continue;
3576                }
3577            }
3578            // Projection.
3579            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
3580            out_rows.push(projected);
3581            if let Some(rem) = limit_remaining.as_mut() {
3582                if *rem == 0 {
3583                    out_rows.pop();
3584                    break;
3585                }
3586                *rem -= 1;
3587            }
3588        }
3589        // Output column schema: derive from SELECT items.
3590        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
3591        Ok(QueryResult::Rows {
3592            columns,
3593            rows: out_rows,
3594        })
3595    }
3596
3597    /// v6.10.2 — simple-path WHERE eval that doesn't go through
3598    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
3599    /// scan paths predicate against a snapshot frozen segment, no
3600    /// cross-row state.
3601    fn eval_expr_simple(
3602        &self,
3603        expr: &Expr,
3604        row: &Row,
3605        ctx: &EvalContext,
3606    ) -> Result<Value, EngineError> {
3607        let cancel = CancelToken::none();
3608        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
3609    }
3610
3611    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
3612    /// Given the table name, the user-supplied projection items,
3613    /// and the mutated rows (post-insert / post-update values, or
3614    /// pre-delete snapshot), build a `QueryResult::Rows` whose
3615    /// schema describes the projected columns. Mailrs migration
3616    /// blocker #1.
3617    fn build_returning_rows(
3618        &self,
3619        table_name: &str,
3620        items: &[SelectItem],
3621        mutated_rows: Vec<Vec<Value>>,
3622    ) -> Result<QueryResult, EngineError> {
3623        let table = self.active_catalog().get(table_name).ok_or_else(|| {
3624            EngineError::Storage(StorageError::TableNotFound {
3625                name: table_name.into(),
3626            })
3627        })?;
3628        let schema_cols = table.schema().columns.clone();
3629        let columns = self.derive_output_columns(items, &schema_cols, table_name);
3630        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
3631        for values in mutated_rows {
3632            let row = Row::new(values);
3633            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
3634            out_rows.push(projected);
3635        }
3636        Ok(QueryResult::Rows {
3637            columns,
3638            rows: out_rows,
3639        })
3640    }
3641
3642    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
3643    /// `SelectItem::Wildcard` to all schema columns and
3644    /// `SelectItem::Expr` via the regular eval path.
3645    fn project_row_simple(
3646        &self,
3647        row: &Row,
3648        items: &[SelectItem],
3649        schema_cols: &[ColumnSchema],
3650        alias: &str,
3651    ) -> Result<Row, EngineError> {
3652        let ctx = EvalContext::new(schema_cols, Some(alias));
3653        let cancel = CancelToken::none();
3654        let mut out_vals = Vec::new();
3655        for item in items {
3656            match item {
3657                SelectItem::Wildcard => {
3658                    out_vals.extend(row.values.iter().cloned());
3659                }
3660                SelectItem::Expr { expr, .. } => {
3661                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
3662                    out_vals.push(v);
3663                }
3664            }
3665        }
3666        Ok(Row::new(out_vals))
3667    }
3668
3669    /// v6.10.2 — derive the output `ColumnSchema` list for an
3670    /// AS OF SEGMENT projection. Wildcards take the full schema;
3671    /// expressions take the alias if present or a synthetic
3672    /// `?column?` (PG convention) otherwise.
3673    fn derive_output_columns(
3674        &self,
3675        items: &[SelectItem],
3676        schema_cols: &[ColumnSchema],
3677        _alias: &str,
3678    ) -> Vec<ColumnSchema> {
3679        let mut out = Vec::new();
3680        for item in items {
3681            match item {
3682                SelectItem::Wildcard => {
3683                    out.extend(schema_cols.iter().cloned());
3684                }
3685                SelectItem::Expr { alias, .. } => {
3686                    let name = alias.clone().unwrap_or_else(|| "?column?".to_string());
3687                    // Default to Text; the caller's row values
3688                    // carry the actual type. v6.10.2 scope.
3689                    out.push(ColumnSchema::new(name, DataType::Text, true));
3690                }
3691            }
3692        }
3693        out
3694    }
3695
3696    fn exec_select_cancel(
3697        &self,
3698        stmt: &SelectStatement,
3699        cancel: CancelToken<'_>,
3700    ) -> Result<QueryResult, EngineError> {
3701        cancel.check()?;
3702        // v6.10.2 — cold-tier time-travel short-circuit. When the
3703        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
3704        // dedicated cold-segment scan instead of the regular
3705        // hot+index path. The scope is intentionally narrow for
3706        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
3707        // optionally with a single-column-equality WHERE. JOINs /
3708        // aggregates / ORDER BY / subqueries on top of a time-
3709        // travelled scan are STABILITY § "Out of v6.10".
3710        if let Some(from) = &stmt.from
3711            && let Some(seg_id) = from.primary.as_of_segment
3712        {
3713            return self.exec_select_as_of_segment(stmt, from, seg_id);
3714        }
3715        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
3716        // pre-CTE because they don't read from the catalog and
3717        // shouldn't participate in regular FROM resolution.
3718        if let Some(from) = &stmt.from
3719            && from.joins.is_empty()
3720            && stmt.where_.is_none()
3721            && stmt.group_by.is_none()
3722            && stmt.having.is_none()
3723            && stmt.unions.is_empty()
3724            && stmt.order_by.is_empty()
3725            && stmt.limit.is_none()
3726            && stmt.offset.is_none()
3727            && !stmt.distinct
3728            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
3729        {
3730            let lower = from.primary.name.to_ascii_lowercase();
3731            match lower.as_str() {
3732                "spg_statistic" => return Ok(self.exec_spg_statistic()),
3733                // v6.5.0 — observability v2 virtual tables.
3734                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
3735                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
3736                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
3737                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
3738                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
3739                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
3740                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
3741                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
3742                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
3743                _ => {}
3744            }
3745        }
3746        // v4.11: CTEs materialise into a temporary enriched catalog
3747        // *before* anything else — the body SELECT can then refer
3748        // to CTE names via the regular FROM-clause resolution.
3749        // Uncorrelated only: each CTE body runs once against the
3750        // current catalog, not against later CTEs' results (left-
3751        // to-right materialisation would relax this, but we keep
3752        // it simple for v4.11 MVP).
3753        if !stmt.ctes.is_empty() {
3754            return self.exec_with_ctes(stmt, cancel);
3755        }
3756        // v4.10: subqueries (uncorrelated) are resolved here, before
3757        // the executor sees the row loop. We clone the statement so
3758        // we can mutate without disturbing the caller's AST — most
3759        // queries pass through with no subquery nodes and the clone
3760        // is cheap; with subqueries the materialisation cost
3761        // dominates anyway.
3762        let mut stmt_owned;
3763        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
3764            stmt_owned = stmt.clone();
3765            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
3766            &stmt_owned
3767        } else {
3768            stmt
3769        };
3770        if stmt_ref.unions.is_empty() {
3771            return self.exec_bare_select_cancel(stmt_ref, cancel);
3772        }
3773        // UNION path: clone-strip the head into a bare block (its own
3774        // DISTINCT and any inner ORDER BY are dropped by parser rule —
3775        // the wrapper SelectStatement carries them), execute, then chain
3776        // peers with left-associative dedup semantics.
3777        let mut head = stmt_ref.clone();
3778        head.unions = Vec::new();
3779        head.order_by = Vec::new();
3780        head.limit = None;
3781        let QueryResult::Rows { columns, mut rows } =
3782            self.exec_bare_select_cancel(&head, cancel)?
3783        else {
3784            unreachable!("bare SELECT cannot return CommandOk")
3785        };
3786        for (kind, peer) in &stmt_ref.unions {
3787            let QueryResult::Rows {
3788                columns: peer_cols,
3789                rows: peer_rows,
3790            } = self.exec_bare_select_cancel(peer, cancel)?
3791            else {
3792                unreachable!("bare SELECT cannot return CommandOk")
3793            };
3794            if peer_cols.len() != columns.len() {
3795                return Err(EngineError::Unsupported(alloc::format!(
3796                    "UNION arity mismatch: head has {} columns, peer has {}",
3797                    columns.len(),
3798                    peer_cols.len()
3799                )));
3800            }
3801            rows.extend(peer_rows);
3802            if matches!(kind, UnionKind::Distinct) {
3803                rows = dedup_rows(rows);
3804            }
3805        }
3806        // ORDER BY at the top of a UNION applies to the combined result.
3807        // Eval against the projected schema (NOT the source table).
3808        if !stmt.order_by.is_empty() {
3809            let synth_ctx = EvalContext::new(&columns, None);
3810            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
3811            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
3812            for r in rows {
3813                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
3814                tagged.push((keys, r));
3815            }
3816            sort_by_keys(&mut tagged, &descs);
3817            rows = tagged.into_iter().map(|(_, r)| r).collect();
3818        }
3819        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
3820        Ok(QueryResult::Rows { columns, rows })
3821    }
3822
3823    #[allow(clippy::too_many_lines)]
3824    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
3825    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
3826    /// Synthesises a single-column virtual table whose column type
3827    /// is TEXT and whose rows are the array elements. Routes
3828    /// through the regular projection / WHERE / ORDER BY / LIMIT
3829    /// machinery so set-returning UNNEST composes naturally with
3830    /// the rest of the SELECT surface.
3831    fn exec_select_unnest(
3832        &self,
3833        stmt: &SelectStatement,
3834        primary: &TableRef,
3835        cancel: CancelToken<'_>,
3836    ) -> Result<QueryResult, EngineError> {
3837        let expr = primary
3838            .unnest_expr
3839            .as_deref()
3840            .expect("caller guards unnest_expr.is_some()");
3841        // Evaluate the array expression once. Empty schema / empty
3842        // row — uncorrelated UNNEST cannot reference outer columns.
3843        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
3844        let ctx = EvalContext::new(&empty_schema, None);
3845        let dummy_row = Row::new(alloc::vec::Vec::new());
3846        // v7.11.13 — unnest dispatches per array element type so
3847        // INT[] / BIGINT[] surface their PG types in projection.
3848        let (elem_dtype, rows): (DataType, alloc::vec::Vec<Row>) =
3849            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
3850                Value::Null => (DataType::Text, alloc::vec::Vec::new()),
3851                Value::TextArray(items) => {
3852                    let rows = items
3853                        .into_iter()
3854                        .map(|item| {
3855                            Row::new(alloc::vec![match item {
3856                                Some(s) => Value::Text(s),
3857                                None => Value::Null,
3858                            }])
3859                        })
3860                        .collect();
3861                    (DataType::Text, rows)
3862                }
3863                Value::IntArray(items) => {
3864                    let rows = items
3865                        .into_iter()
3866                        .map(|item| {
3867                            Row::new(alloc::vec![match item {
3868                                Some(n) => Value::Int(n),
3869                                None => Value::Null,
3870                            }])
3871                        })
3872                        .collect();
3873                    (DataType::Int, rows)
3874                }
3875                Value::BigIntArray(items) => {
3876                    let rows = items
3877                        .into_iter()
3878                        .map(|item| {
3879                            Row::new(alloc::vec![match item {
3880                                Some(n) => Value::BigInt(n),
3881                                None => Value::Null,
3882                            }])
3883                        })
3884                        .collect();
3885                    (DataType::BigInt, rows)
3886                }
3887                other => {
3888                    return Err(EngineError::Unsupported(alloc::format!(
3889                        "unnest() expects an array argument, got {:?}",
3890                        other.data_type()
3891                    )));
3892                }
3893            };
3894        let alias = primary
3895            .alias
3896            .clone()
3897            .unwrap_or_else(|| "unnest".to_string());
3898        let col_schema = ColumnSchema::new(alias.clone(), elem_dtype, true);
3899        let schema_cols = alloc::vec![col_schema.clone()];
3900        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
3901        // Apply WHERE.
3902        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
3903            let mut out = alloc::vec::Vec::with_capacity(rows.len());
3904            for row in rows {
3905                cancel.check()?;
3906                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
3907                if matches!(v, Value::Bool(true)) {
3908                    out.push(row);
3909                }
3910            }
3911            out
3912        } else {
3913            rows
3914        };
3915        // Projection.
3916        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
3917        let mut projected_rows: alloc::vec::Vec<Row> =
3918            alloc::vec::Vec::with_capacity(filtered.len());
3919        for row in &filtered {
3920            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
3921            for p in &projection {
3922                vals.push(eval::eval_expr(&p.expr, row, &scan_ctx).map_err(EngineError::Eval)?);
3923            }
3924            projected_rows.push(Row::new(vals));
3925        }
3926        // ORDER BY / LIMIT — apply on the projected rows (cheap;
3927        // unnest result sets are small by design).
3928        let columns: alloc::vec::Vec<ColumnSchema> = projection
3929            .iter()
3930            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
3931            .collect();
3932        // Re-evaluate ORDER BY against the source schema (pre-projection
3933        // so col refs by name still resolve through `scan_ctx`).
3934        if !stmt.order_by.is_empty() {
3935            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
3936                .iter()
3937                .enumerate()
3938                .map(|(i, r)| -> Result<_, EngineError> {
3939                    let keys: Result<Vec<Value>, EngineError> = stmt
3940                        .order_by
3941                        .iter()
3942                        .map(|ob| {
3943                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
3944                        })
3945                        .collect();
3946                    Ok((i, keys?))
3947                })
3948                .collect::<Result<_, _>>()?;
3949            indexed.sort_by(|a, b| {
3950                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
3951                    let mut cmp = value_cmp(ka, kb);
3952                    if stmt.order_by[idx].desc {
3953                        cmp = cmp.reverse();
3954                    }
3955                    if cmp != core::cmp::Ordering::Equal {
3956                        return cmp;
3957                    }
3958                }
3959                core::cmp::Ordering::Equal
3960            });
3961            projected_rows = indexed
3962                .into_iter()
3963                .map(|(i, _)| projected_rows[i].clone())
3964                .collect();
3965        }
3966        // LIMIT / OFFSET — apply at the tail.
3967        if let Some(offset) = stmt.offset_literal() {
3968            let off = (offset as usize).min(projected_rows.len());
3969            projected_rows.drain(..off);
3970        }
3971        if let Some(limit) = stmt.limit_literal() {
3972            projected_rows.truncate(limit as usize);
3973        }
3974        Ok(QueryResult::Rows {
3975            columns,
3976            rows: projected_rows,
3977        })
3978    }
3979
3980    fn exec_bare_select_cancel(
3981        &self,
3982        stmt: &SelectStatement,
3983        cancel: CancelToken<'_>,
3984    ) -> Result<QueryResult, EngineError> {
3985        // v4.12: window-function path. When the projection contains
3986        // any `name(args) OVER (...)` we route to the dedicated
3987        // executor — partition + sort + per-row window value before
3988        // the regular projection.
3989        if select_has_window(stmt) {
3990            return self.exec_select_with_window(stmt, cancel);
3991        }
3992        // Constant SELECT (no FROM) — evaluate each item once against an
3993        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
3994        // `SELECT '7'::INT`. Column references will surface as
3995        // ColumnNotFound on eval since the schema is empty.
3996        let Some(from) = &stmt.from else {
3997            let empty_schema: Vec<ColumnSchema> = Vec::new();
3998            let ctx = EvalContext::new(&empty_schema, None);
3999            let projection = build_projection(&stmt.items, &empty_schema, "")?;
4000            let dummy_row = Row::new(Vec::new());
4001            let mut values = Vec::with_capacity(projection.len());
4002            for p in &projection {
4003                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
4004            }
4005            let columns: Vec<ColumnSchema> = projection
4006                .into_iter()
4007                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4008                .collect();
4009            return Ok(QueryResult::Rows {
4010                columns,
4011                rows: alloc::vec![Row::new(values)],
4012            });
4013        };
4014        // Multi-table FROM (one or more joined peers) goes through the
4015        // nested-loop join executor. Single-table FROM stays on the
4016        // existing scan + index-seek path.
4017        if !from.joins.is_empty() {
4018            return self.exec_joined_select(stmt, from);
4019        }
4020        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
4021        // single-column table at SELECT entry by evaluating the
4022        // expression once against the empty row (UNNEST is
4023        // uncorrelated in v7.11; correlated / LATERAL unnest is a
4024        // v7.12 carve-out). Build a virtual `Table` in a heap-only
4025        // catalog, then route to the regular scan path.
4026        if from.primary.unnest_expr.is_some() {
4027            return self.exec_select_unnest(stmt, &from.primary, cancel);
4028        }
4029        let primary = &from.primary;
4030        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
4031            StorageError::TableNotFound {
4032                name: primary.name.clone(),
4033            }
4034        })?;
4035        let schema_cols = &table.schema().columns;
4036        // The qualifier accepted on column refs is the alias (if any) else the
4037        // bare table name.
4038        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
4039        let ctx = EvalContext::new(schema_cols, Some(alias));
4040
4041        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
4042        // WHERE and an NSW index on `col` skips the full scan. The
4043        // walk returns rows already in ascending-distance order, so
4044        // ORDER BY / LIMIT are honoured implicitly.
4045        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
4046            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
4047        }
4048
4049        // Index seek: if WHERE is `col = literal` (or commuted) and the
4050        // referenced column has an index, dispatch each locator through
4051        // the catalog (hot tier → borrow, cold tier → page-read +
4052        // decode) and iterate just those rows. Otherwise fall back to a
4053        // full scan over the hot tier (cold-tier rows are only reached
4054        // via index seek in v5.1 — full table scans against cold-tier
4055        // data ship in v5.2 with the freezer's per-segment scan API).
4056        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt
4057            .where_
4058            .as_ref()
4059            .and_then(|w| try_index_seek(w, schema_cols, self.active_catalog(), table, alias));
4060
4061        // Aggregate path: filter rows first, then hand off to the
4062        // aggregate executor which does its own projection + ORDER BY.
4063        if aggregate::uses_aggregate(stmt) {
4064            let mut filtered: Vec<&Row> = Vec::new();
4065            // v6.2.6 — Memoize: per-query LRU cache for correlated
4066            // scalar subqueries. Fresh per row-loop entry so each
4067            // SELECT execution gets an isolated cache.
4068            let mut memo = memoize::MemoizeCache::new();
4069            if let Some(rows) = &indexed_rows {
4070                for cow in rows {
4071                    let row = cow.as_ref();
4072                    if let Some(where_expr) = &stmt.where_ {
4073                        let cond = self.eval_expr_with_correlated(
4074                            where_expr,
4075                            row,
4076                            &ctx,
4077                            cancel,
4078                            Some(&mut memo),
4079                        )?;
4080                        if !matches!(cond, Value::Bool(true)) {
4081                            continue;
4082                        }
4083                    }
4084                    filtered.push(row);
4085                }
4086            } else {
4087                for i in 0..table.row_count() {
4088                    let row = &table.rows()[i];
4089                    if let Some(where_expr) = &stmt.where_ {
4090                        let cond = self.eval_expr_with_correlated(
4091                            where_expr,
4092                            row,
4093                            &ctx,
4094                            cancel,
4095                            Some(&mut memo),
4096                        )?;
4097                        if !matches!(cond, Value::Bool(true)) {
4098                            continue;
4099                        }
4100                    }
4101                    filtered.push(row);
4102                }
4103            }
4104            let mut agg = aggregate::run(stmt, &filtered, schema_cols, Some(alias))?;
4105            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
4106            return Ok(QueryResult::Rows {
4107                columns: agg.columns,
4108                rows: agg.rows,
4109            });
4110        }
4111
4112        let projection = build_projection(&stmt.items, schema_cols, alias)?;
4113
4114        // Materialise the filter pass into `(order_key, projected_row)`
4115        // tuples. The order key is `None` when there's no ORDER BY clause.
4116        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
4117        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
4118        let mut memo = memoize::MemoizeCache::new();
4119        // Inline the per-row work in a closure so the indexed and full-
4120        // scan branches share the body.
4121        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
4122            if loop_idx.is_multiple_of(256) {
4123                cancel.check()?;
4124            }
4125            if let Some(where_expr) = &stmt.where_ {
4126                let cond =
4127                    self.eval_expr_with_correlated(where_expr, row, &ctx, cancel, Some(&mut memo))?;
4128                if !matches!(cond, Value::Bool(true)) {
4129                    return Ok(());
4130                }
4131            }
4132            let mut values = Vec::with_capacity(projection.len());
4133            for p in &projection {
4134                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
4135            }
4136            let order_keys = if stmt.order_by.is_empty() {
4137                Vec::new()
4138            } else {
4139                build_order_keys(&stmt.order_by, row, &ctx)?
4140            };
4141            tagged.push((order_keys, Row::new(values)));
4142            Ok(())
4143        };
4144        if let Some(rows) = &indexed_rows {
4145            for (loop_idx, cow) in rows.iter().enumerate() {
4146                process_row(cow.as_ref(), loop_idx)?;
4147            }
4148        } else {
4149            for i in 0..table.row_count() {
4150                process_row(&table.rows()[i], i)?;
4151            }
4152        }
4153
4154        if !stmt.order_by.is_empty() {
4155            // Partial-sort fast path: when LIMIT is small relative to
4156            // the row count, select_nth_unstable + sort just the
4157            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
4158            // requires the full sort because de-dup happens after.
4159            let keep = if stmt.distinct {
4160                None
4161            } else {
4162                stmt.limit_literal()
4163                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
4164            };
4165            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
4166            partial_sort_tagged(&mut tagged, keep, &descs);
4167        }
4168
4169        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
4170        if stmt.distinct {
4171            output_rows = dedup_rows(output_rows);
4172        }
4173        apply_offset_and_limit(
4174            &mut output_rows,
4175            stmt.offset_literal(),
4176            stmt.limit_literal(),
4177        );
4178
4179        let columns: Vec<ColumnSchema> = projection
4180            .into_iter()
4181            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4182            .collect();
4183
4184        Ok(QueryResult::Rows {
4185            columns,
4186            rows: output_rows,
4187        })
4188    }
4189
4190    /// Multi-table SELECT executor (one or more JOIN peers).
4191    ///
4192    /// v1.10 builds the joined row set up-front via nested-loop joins,
4193    /// then runs WHERE + projection + ORDER BY against the combined
4194    /// rows. No index seek. Aggregates and DISTINCT still work because
4195    /// the executor delegates projection through the same shared paths.
4196    #[allow(clippy::too_many_lines)]
4197    fn exec_joined_select(
4198        &self,
4199        stmt: &SelectStatement,
4200        from: &FromClause,
4201    ) -> Result<QueryResult, EngineError> {
4202        // Resolve every table reference up front so we surface
4203        // TableNotFound before we start the cartesian work.
4204        let primary_table = self
4205            .active_catalog()
4206            .get(&from.primary.name)
4207            .ok_or_else(|| StorageError::TableNotFound {
4208                name: from.primary.name.clone(),
4209            })?;
4210        let primary_alias = from
4211            .primary
4212            .alias
4213            .as_deref()
4214            .unwrap_or(from.primary.name.as_str())
4215            .to_string();
4216        let mut joined_tables: Vec<(&Table, String, JoinKind, Option<&Expr>)> = Vec::new();
4217        for j in &from.joins {
4218            let t = self.active_catalog().get(&j.table.name).ok_or_else(|| {
4219                StorageError::TableNotFound {
4220                    name: j.table.name.clone(),
4221                }
4222            })?;
4223            let a = j
4224                .table
4225                .alias
4226                .as_deref()
4227                .unwrap_or(j.table.name.as_str())
4228                .to_string();
4229            joined_tables.push((t, a, j.kind, j.on.as_ref()));
4230        }
4231
4232        // Build the combined schema: composite "alias.col" names so the
4233        // qualified-column resolver can find anything by exact match.
4234        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
4235        for col in &primary_table.schema().columns {
4236            combined_schema.push(ColumnSchema::new(
4237                alloc::format!("{primary_alias}.{}", col.name),
4238                col.ty,
4239                col.nullable,
4240            ));
4241        }
4242        for (t, a, _, _) in &joined_tables {
4243            for col in &t.schema().columns {
4244                combined_schema.push(ColumnSchema::new(
4245                    alloc::format!("{a}.{}", col.name),
4246                    col.ty,
4247                    col.nullable,
4248                ));
4249            }
4250        }
4251        let ctx = EvalContext::new(&combined_schema, None);
4252
4253        // Nested-loop join. Starting set: every primary row, padded with
4254        // (no joined columns yet).
4255        let mut working: Vec<Row> = primary_table.rows().iter().cloned().collect();
4256        let mut produced_len = primary_table.schema().columns.len();
4257        for (t, _, kind, on) in &joined_tables {
4258            let right_arity = t.schema().columns.len();
4259            let mut next: Vec<Row> = Vec::new();
4260            for left in &working {
4261                let mut left_matched = false;
4262                for right in t.rows() {
4263                    let mut combined_vals = left.values.clone();
4264                    combined_vals.extend(right.values.iter().cloned());
4265                    // Pad combined to the eventual full width so the
4266                    // partial schema still matches positions used by ON.
4267                    let combined = Row::new(combined_vals);
4268                    let keep = if let Some(on_expr) = on {
4269                        let cond = eval::eval_expr(on_expr, &combined, &ctx)?;
4270                        matches!(cond, Value::Bool(true))
4271                    } else {
4272                        // CROSS / comma-list: every pair survives.
4273                        true
4274                    };
4275                    if keep {
4276                        next.push(combined);
4277                        left_matched = true;
4278                    }
4279                }
4280                if !left_matched && matches!(kind, JoinKind::Left) {
4281                    // LEFT OUTER JOIN: emit the left row with NULLs on
4282                    // the right side when no peer matched.
4283                    let mut combined_vals = left.values.clone();
4284                    for _ in 0..right_arity {
4285                        combined_vals.push(Value::Null);
4286                    }
4287                    next.push(Row::new(combined_vals));
4288                }
4289            }
4290            working = next;
4291            produced_len += right_arity;
4292            debug_assert!(produced_len <= combined_schema.len());
4293        }
4294
4295        // WHERE filter against combined rows.
4296        let mut filtered: Vec<Row> = Vec::new();
4297        for row in working {
4298            if let Some(where_expr) = &stmt.where_ {
4299                let cond = eval::eval_expr(where_expr, &row, &ctx)?;
4300                if !matches!(cond, Value::Bool(true)) {
4301                    continue;
4302                }
4303            }
4304            filtered.push(row);
4305        }
4306
4307        // Aggregate path: handle GROUP BY / aggregate calls over the
4308        // joined+filtered rows.
4309        if aggregate::uses_aggregate(stmt) {
4310            let refs: Vec<&Row> = filtered.iter().collect();
4311            let mut agg = aggregate::run(stmt, &refs, &combined_schema, None)?;
4312            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
4313            return Ok(QueryResult::Rows {
4314                columns: agg.columns,
4315                rows: agg.rows,
4316            });
4317        }
4318
4319        let projection = build_projection(&stmt.items, &combined_schema, "")?;
4320        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
4321        for row in &filtered {
4322            let mut values = Vec::with_capacity(projection.len());
4323            for p in &projection {
4324                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
4325            }
4326            let order_keys = if stmt.order_by.is_empty() {
4327                Vec::new()
4328            } else {
4329                build_order_keys(&stmt.order_by, row, &ctx)?
4330            };
4331            tagged.push((order_keys, Row::new(values)));
4332        }
4333        if !stmt.order_by.is_empty() {
4334            let keep = if stmt.distinct {
4335                None
4336            } else {
4337                stmt.limit_literal()
4338                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
4339            };
4340            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
4341            partial_sort_tagged(&mut tagged, keep, &descs);
4342        }
4343        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
4344        if stmt.distinct {
4345            output_rows = dedup_rows(output_rows);
4346        }
4347        apply_offset_and_limit(
4348            &mut output_rows,
4349            stmt.offset_literal(),
4350            stmt.limit_literal(),
4351        );
4352        let columns: Vec<ColumnSchema> = projection
4353            .into_iter()
4354            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4355            .collect();
4356        Ok(QueryResult::Rows {
4357            columns,
4358            rows: output_rows,
4359        })
4360    }
4361}
4362
4363/// One row-producing projection: an expression to evaluate, the resulting
4364/// column's user-visible name, its inferred type, and nullability.
4365#[derive(Debug, Clone)]
4366struct ProjectedItem {
4367    expr: Expr,
4368    output_name: String,
4369    ty: DataType,
4370    nullable: bool,
4371}
4372
4373/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
4374/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
4375/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
4376/// the spec's "two NULLs are not distinct"; the second is a tolerated
4377/// quirk for v1 (no NaN literals are reachable from the SQL surface).
4378fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
4379    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
4380    for r in rows {
4381        if !out.iter().any(|seen| seen == &r) {
4382            out.push(r);
4383        }
4384    }
4385    out
4386}
4387
4388/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
4389/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
4390/// order via the byte values; vectors are not sortable.
4391fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
4392    match v {
4393        Value::Null => Ok(f64::INFINITY),
4394        Value::SmallInt(n) => Ok(f64::from(*n)),
4395        Value::Int(n) => Ok(f64::from(*n)),
4396        Value::Date(d) => Ok(f64::from(*d)),
4397        #[allow(clippy::cast_precision_loss)]
4398        Value::Timestamp(t) => Ok(*t as f64),
4399        #[allow(clippy::cast_precision_loss)]
4400        Value::Numeric { scaled, scale } => {
4401            // Scaled integer / 10^scale, computed via f64 for sort
4402            // ordering only. Precision losses here only matter for
4403            // ORDER BY tie-breaks well past 15 significant digits.
4404            // `f64::powi` lives in std; we hand-roll the loop so the
4405            // no_std engine crate doesn't need it.
4406            let mut divisor = 1.0_f64;
4407            for _ in 0..*scale {
4408                divisor *= 10.0;
4409            }
4410            Ok((*scaled as f64) / divisor)
4411        }
4412        #[allow(clippy::cast_precision_loss)]
4413        Value::BigInt(n) => Ok(*n as f64),
4414        Value::Float(x) => Ok(*x),
4415        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
4416        Value::Text(s) => {
4417            // Lex order by codepoints — good enough for ORDER BY name.
4418            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
4419            // partial_cmp Equal. v1.x can swap in a real string comparator.
4420            let mut key: u64 = 0;
4421            for &b in s.as_bytes().iter().take(8) {
4422                key = (key << 8) | u64::from(b);
4423            }
4424            #[allow(clippy::cast_precision_loss)]
4425            Ok(key as f64)
4426        }
4427        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
4428            Err(EngineError::Unsupported(
4429                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
4430            ))
4431        }
4432        Value::Interval { .. } => Err(EngineError::Unsupported(
4433            "ORDER BY of an INTERVAL is not supported in v2.11 \
4434             (months vs micros has no single canonical ordering)"
4435                .into(),
4436        )),
4437        Value::Json(_) => Err(EngineError::Unsupported(
4438            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
4439        )),
4440        // v7.5.0 — Value is #[non_exhaustive]; future variants need
4441        // an explicit ORDER BY mapping. Surface as Unsupported until
4442        // engine support is added.
4443        _ => Err(EngineError::Unsupported(
4444            "ORDER BY of this value type is not supported".into(),
4445        )),
4446    }
4447}
4448
4449/// Try to plan a WHERE clause as an equality lookup against an existing
4450/// index. Returns the candidate row indices on success; `None` means the
4451/// caller should fall back to a full scan.
4452///
4453/// v0.8 recognises a single top-level `col = literal` (in either operand
4454/// order). AND chains and range scans land in later milestones.
4455/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
4456/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
4457/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
4458/// present, the planner does an "over-fetch and filter" pass — it
4459/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
4460/// against each, and trims back to `k`. Returns the row indices in
4461/// ascending-distance order when the plan applies.
4462fn try_nsw_knn(
4463    stmt: &SelectStatement,
4464    table: &Table,
4465    schema_cols: &[ColumnSchema],
4466    table_alias: &str,
4467) -> Option<Vec<usize>> {
4468    if stmt.distinct {
4469        return None;
4470    }
4471    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
4472    if limit == 0 {
4473        return None;
4474    }
4475    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
4476    // distance metric. Multi-key ORDER BY falls through to the
4477    // generic sort path.
4478    if stmt.order_by.len() != 1 {
4479        return None;
4480    }
4481    let order = &stmt.order_by[0];
4482    // NSW kNN returns rows ascending by distance — DESC inverts the
4483    // natural order, so the planner can't handle it without a sort
4484    // pass. Fall back to the generic ORDER BY path.
4485    if order.desc {
4486        return None;
4487    }
4488    let Expr::Binary { lhs, op, rhs } = &order.expr else {
4489        return None;
4490    };
4491    let metric = match op {
4492        BinOp::L2Distance => spg_storage::NswMetric::L2,
4493        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
4494        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
4495        _ => return None,
4496    };
4497    // Accept both `col <op> literal` and `literal <op> col`.
4498    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
4499        (lhs.as_ref(), rhs.as_ref())
4500    else {
4501        return None;
4502    };
4503    if let Some(q) = &col.qualifier
4504        && q != table_alias
4505    {
4506        return None;
4507    }
4508    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
4509    let query = literal_to_vector(literal)?;
4510    let idx = spg_storage::nsw_index_on(table, col_pos)?;
4511    if let Some(where_expr) = &stmt.where_ {
4512        // Over-fetch and filter. The factor (10×) is a heuristic that
4513        // covers typical selectivity for the corpus tests; v2.x will
4514        // make it configurable.
4515        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
4516        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
4517        let ctx = EvalContext::new(schema_cols, Some(table_alias));
4518        let mut kept: Vec<usize> = Vec::with_capacity(limit);
4519        for i in candidates {
4520            let row = &table.rows()[i];
4521            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
4522            if matches!(cond, Value::Bool(true)) {
4523                kept.push(i);
4524                if kept.len() >= limit {
4525                    break;
4526                }
4527            }
4528        }
4529        Some(kept)
4530    } else {
4531        Some(spg_storage::nsw_query(
4532            table, &idx.name, &query, limit, metric,
4533        ))
4534    }
4535}
4536
4537/// Lower bound on the over-fetch pool when WHERE is present — even
4538/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
4539/// few WHERE rejections.
4540const NSW_OVER_FETCH_FLOOR: usize = 32;
4541
4542/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
4543/// `None` for anything we can't fold at plan time.
4544fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
4545    match e {
4546        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
4547        Expr::Cast { expr, .. } => literal_to_vector(expr),
4548        _ => None,
4549    }
4550}
4551
4552/// Materialise rows in a planner-supplied order (used by the NSW path)
4553/// without re-running ORDER BY. The projection + LIMIT slot mirror the
4554/// equivalent block in `exec_bare_select`.
4555fn materialise_in_order(
4556    stmt: &SelectStatement,
4557    table: &Table,
4558    schema_cols: &[ColumnSchema],
4559    table_alias: &str,
4560    ordered_rows: &[usize],
4561) -> Result<QueryResult, EngineError> {
4562    let ctx = EvalContext::new(schema_cols, Some(table_alias));
4563    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
4564    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
4565    for &i in ordered_rows {
4566        let row = &table.rows()[i];
4567        let mut values = Vec::with_capacity(projection.len());
4568        for p in &projection {
4569            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
4570        }
4571        output_rows.push(Row::new(values));
4572    }
4573    apply_offset_and_limit(
4574        &mut output_rows,
4575        stmt.offset_literal(),
4576        stmt.limit_literal(),
4577    );
4578    let columns: Vec<ColumnSchema> = projection
4579        .into_iter()
4580        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4581        .collect();
4582    Ok(QueryResult::Rows {
4583        columns,
4584        rows: output_rows,
4585    })
4586}
4587
4588fn try_index_seek<'a>(
4589    where_expr: &Expr,
4590    schema_cols: &[ColumnSchema],
4591    catalog: &'a Catalog,
4592    table: &'a Table,
4593    table_alias: &str,
4594) -> Option<Vec<Cow<'a, Row>>> {
4595    // v7.11.3 — recurse through top-level `AND` so a PG-style
4596    // composite predicate like `WHERE id = 1 AND created_at > $1`
4597    // still hits the index on `id`. The caller re-applies the
4598    // full WHERE expression to each returned row, so dropping the
4599    // residual conjuncts here is correct — the index just narrows
4600    // the candidate set.
4601    if let Expr::Binary {
4602        lhs,
4603        op: BinOp::And,
4604        rhs,
4605    } = where_expr
4606    {
4607        // Try LHS first (typical convention: leading equality on
4608        // the indexed column comes first in user-written SQL).
4609        if let Some(rows) = try_index_seek(lhs, schema_cols, catalog, table, table_alias) {
4610            return Some(rows);
4611        }
4612        return try_index_seek(rhs, schema_cols, catalog, table, table_alias);
4613    }
4614    let Expr::Binary {
4615        lhs,
4616        op: BinOp::Eq,
4617        rhs,
4618    } = where_expr
4619    else {
4620        return None;
4621    };
4622    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
4623        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
4624    let idx = table.index_on(col_pos)?;
4625    let key = IndexKey::from_value(&value)?;
4626    let locators = idx.lookup_eq(&key);
4627    let table_name = table.schema().name.as_str();
4628    // v5.1: each locator dispatches to either the hot tier (zero-
4629    // copy borrow of `table.rows()[i]`) or a cold-tier segment
4630    // (one page read + dense row decode, ~µs scale). Cold rows are
4631    // returned as `Cow::Owned` so the caller's `&Row` iteration
4632    // doesn't see a tier distinction; pre-freezer (no cold
4633    // segments loaded) every locator is `Hot` and every entry is
4634    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
4635    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
4636    for loc in locators {
4637        match *loc {
4638            spg_storage::RowLocator::Hot(i) => {
4639                if let Some(row) = table.rows().get(i) {
4640                    out.push(Cow::Borrowed(row));
4641                }
4642            }
4643            spg_storage::RowLocator::Cold { segment_id, .. } => {
4644                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
4645                    out.push(Cow::Owned(row));
4646                }
4647            }
4648        }
4649    }
4650    Some(out)
4651}
4652
4653/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
4654/// is a simple `col = literal` predicate suitable for a `BTree` index
4655/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
4656/// decide whether a write touches a cold-tier row (which requires
4657/// promote-on-write / shadow-on-delete) before falling through to
4658/// the hot-tier row walk.
4659///
4660/// Returns `None` for any predicate shape the planner can't push
4661/// down to an index seek — complex WHERE clauses always take the
4662/// hot-only path (cold rows are immutable to non-indexed writes
4663/// until a future scan-fanout sub-version).
4664fn try_pk_predicate(
4665    where_expr: &Expr,
4666    schema_cols: &[ColumnSchema],
4667    table_alias: &str,
4668) -> Option<(usize, IndexKey)> {
4669    let Expr::Binary {
4670        lhs,
4671        op: BinOp::Eq,
4672        rhs,
4673    } = where_expr
4674    else {
4675        return None;
4676    };
4677    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
4678        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
4679    let key = IndexKey::from_value(&value)?;
4680    Some((col_pos, key))
4681}
4682
4683fn resolve_col_literal_pair(
4684    col_side: &Expr,
4685    lit_side: &Expr,
4686    schema_cols: &[ColumnSchema],
4687    table_alias: &str,
4688) -> Option<(usize, Value)> {
4689    let Expr::Column(c) = col_side else {
4690        return None;
4691    };
4692    if let Some(q) = &c.qualifier
4693        && q != table_alias
4694    {
4695        return None;
4696    }
4697    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
4698    let Expr::Literal(l) = lit_side else {
4699        return None;
4700    };
4701    let v = match l {
4702        Literal::Integer(n) => {
4703            if let Ok(small) = i32::try_from(*n) {
4704                Value::Int(small)
4705            } else {
4706                Value::BigInt(*n)
4707            }
4708        }
4709        Literal::Float(x) => Value::Float(*x),
4710        Literal::String(s) => Value::Text(s.clone()),
4711        Literal::Bool(b) => Value::Bool(*b),
4712        Literal::Null => Value::Null,
4713        // Vector and Interval literals can't be used as B-tree index keys.
4714        // Tell the planner to fall back to full-scan.
4715        Literal::Vector(_) | Literal::Interval { .. } => return None,
4716    };
4717    Some((pos, v))
4718}
4719
4720/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
4721/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
4722/// `EngineError` so the projection-build path keeps `UnknownQualifier`
4723/// vs `ColumnNotFound` distinct.
4724fn resolve_projection_column<'a>(
4725    c: &ColumnName,
4726    schema_cols: &'a [ColumnSchema],
4727    table_alias: &str,
4728) -> Result<&'a ColumnSchema, EngineError> {
4729    if let Some(q) = &c.qualifier {
4730        let composite = alloc::format!("{q}.{name}", name = c.name);
4731        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
4732            return Ok(s);
4733        }
4734        // Single-table case: the qualifier may equal the active alias —
4735        // then look for the bare column name.
4736        if q == table_alias
4737            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
4738        {
4739            return Ok(s);
4740        }
4741        // For multi-table schemas the qualifier is unknown only if no
4742        // column bears the "<q>." prefix. For single-table, the alias
4743        // mismatch alone is enough.
4744        let prefix = alloc::format!("{q}.");
4745        let qualifier_known =
4746            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
4747        if !qualifier_known {
4748            return Err(EngineError::Eval(EvalError::UnknownQualifier {
4749                qualifier: q.clone(),
4750            }));
4751        }
4752        return Err(EngineError::Eval(EvalError::ColumnNotFound {
4753            name: c.name.clone(),
4754        }));
4755    }
4756    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
4757        return Ok(s);
4758    }
4759    let suffix = alloc::format!(".{name}", name = c.name);
4760    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
4761    let first = matches.next();
4762    let extra = matches.next();
4763    match (first, extra) {
4764        (Some(s), None) => Ok(s),
4765        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
4766            detail: alloc::format!("ambiguous column reference: {}", c.name),
4767        })),
4768        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
4769            name: c.name.clone(),
4770        })),
4771    }
4772}
4773
4774fn build_projection(
4775    items: &[SelectItem],
4776    schema_cols: &[ColumnSchema],
4777    table_alias: &str,
4778) -> Result<Vec<ProjectedItem>, EngineError> {
4779    let mut out = Vec::new();
4780    for item in items {
4781        match item {
4782            SelectItem::Wildcard => {
4783                for col in schema_cols {
4784                    out.push(ProjectedItem {
4785                        expr: Expr::Column(ColumnName {
4786                            qualifier: None,
4787                            name: col.name.clone(),
4788                        }),
4789                        output_name: col.name.clone(),
4790                        ty: col.ty,
4791                        nullable: col.nullable,
4792                    });
4793                }
4794            }
4795            SelectItem::Expr { expr, alias } => {
4796                // Plain column ref keeps full schema info (real type +
4797                // nullability). Compound expressions evaluate fine but have
4798                // no static type — surface them as nullable TEXT, which is
4799                // what most clients render anyway.
4800                if let Expr::Column(c) = expr {
4801                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
4802                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
4803                    out.push(ProjectedItem {
4804                        expr: expr.clone(),
4805                        output_name,
4806                        ty: sch.ty,
4807                        nullable: sch.nullable,
4808                    });
4809                } else {
4810                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
4811                    out.push(ProjectedItem {
4812                        expr: expr.clone(),
4813                        output_name,
4814                        ty: DataType::Text,
4815                        nullable: true,
4816                    });
4817                }
4818            }
4819        }
4820    }
4821    Ok(out)
4822}
4823
4824/// Promote an integer to a NUMERIC value at the requested scale.
4825/// Rejects values that, after scaling, would overflow the column's
4826/// precision budget.
4827fn numeric_from_integer(
4828    n: i128,
4829    precision: u8,
4830    scale: u8,
4831    col_name: &str,
4832) -> Result<Value, EngineError> {
4833    let factor = pow10_i128(scale);
4834    let scaled = n.checked_mul(factor).ok_or_else(|| {
4835        EngineError::Unsupported(alloc::format!(
4836            "integer overflow scaling value for column `{col_name}` to scale {scale}"
4837        ))
4838    })?;
4839    check_precision(scaled, precision, col_name)?;
4840    Ok(Value::Numeric { scaled, scale })
4841}
4842
4843/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
4844/// then verifies the result fits the column's precision.
4845#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
4846fn numeric_from_float(
4847    x: f64,
4848    precision: u8,
4849    scale: u8,
4850    col_name: &str,
4851) -> Result<Value, EngineError> {
4852    if !x.is_finite() {
4853        return Err(EngineError::Unsupported(alloc::format!(
4854            "cannot store non-finite float in NUMERIC column `{col_name}`"
4855        )));
4856    }
4857    let mut factor = 1.0_f64;
4858    for _ in 0..scale {
4859        factor *= 10.0;
4860    }
4861    // Round half-away-from-zero by biasing then casting (`as i128`
4862    // truncates toward zero, so the bias + truncation gives the
4863    // desired rounding). `f64::floor` / `ceil` live in std; we don't
4864    // need them — the cast handles the truncation step.
4865    let shifted = x * factor;
4866    let biased = if shifted >= 0.0 {
4867        shifted + 0.5
4868    } else {
4869        shifted - 0.5
4870    };
4871    // Range-check before casting back to i128 — the cast itself is
4872    // saturating in Rust, which would silently truncate huge inputs.
4873    if !(-1e38..=1e38).contains(&biased) {
4874        return Err(EngineError::Unsupported(alloc::format!(
4875            "value {x} overflows NUMERIC range for column `{col_name}`"
4876        )));
4877    }
4878    let scaled = biased as i128;
4879    check_precision(scaled, precision, col_name)?;
4880    Ok(Value::Numeric { scaled, scale })
4881}
4882
4883/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
4884/// multiplies by 10; going down rounds half-away-from-zero.
4885fn numeric_rescale(
4886    scaled: i128,
4887    src_scale: u8,
4888    precision: u8,
4889    dst_scale: u8,
4890    col_name: &str,
4891) -> Result<Value, EngineError> {
4892    let new_scaled = if dst_scale >= src_scale {
4893        let bump = pow10_i128(dst_scale - src_scale);
4894        scaled.checked_mul(bump).ok_or_else(|| {
4895            EngineError::Unsupported(alloc::format!(
4896                "overflow rescaling NUMERIC for column `{col_name}`"
4897            ))
4898        })?
4899    } else {
4900        let drop = pow10_i128(src_scale - dst_scale);
4901        let half = drop / 2;
4902        if scaled >= 0 {
4903            (scaled + half) / drop
4904        } else {
4905            (scaled - half) / drop
4906        }
4907    };
4908    check_precision(new_scaled, precision, col_name)?;
4909    Ok(Value::Numeric {
4910        scaled: new_scaled,
4911        scale: dst_scale,
4912    })
4913}
4914
4915/// Drop the fractional part of a scaled integer, returning the integer
4916/// portion (toward zero). Used for NUMERIC → INT casts.
4917const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
4918    if scale == 0 {
4919        return scaled;
4920    }
4921    let factor = pow10_i128_const(scale);
4922    scaled / factor
4923}
4924
4925/// Verify a scaled NUMERIC value fits the column's declared precision.
4926/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
4927/// skip the check there.
4928fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
4929    if precision == 0 {
4930        return Ok(());
4931    }
4932    let limit = pow10_i128(precision);
4933    if scaled.unsigned_abs() >= limit.unsigned_abs() {
4934        return Err(EngineError::Unsupported(alloc::format!(
4935            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
4936        )));
4937    }
4938    Ok(())
4939}
4940
4941const fn pow10_i128_const(p: u8) -> i128 {
4942    let mut acc: i128 = 1;
4943    let mut i = 0;
4944    while i < p {
4945        acc *= 10;
4946        i += 1;
4947    }
4948    acc
4949}
4950
4951fn pow10_i128(p: u8) -> i128 {
4952    pow10_i128_const(p)
4953}
4954
4955/// Walk a parsed `Statement`, swapping any `NOW()` /
4956/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
4957/// literal cast that wraps the engine's per-statement clock reading.
4958/// When `now_micros` is `None`, calls stay as-is and surface as
4959/// `unknown function` at eval time — keeps the error path explicit.
4960/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
4961/// replace every subquery node with a materialised literal. SPG
4962/// only supports uncorrelated subqueries — the inner SELECT does
4963/// not see outer-row columns, so the result is the same for every
4964/// outer row and can be evaluated once.
4965///
4966/// Returns the rewritten statement; the caller passes this to the
4967/// regular row-loop executor which no longer sees Subquery nodes
4968/// in its tree.
4969impl Engine {
4970    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
4971    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
4972    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
4973    /// 1. Apply the WHERE filter.
4974    /// 2. For each unique `WindowFunction` node in the projection,
4975    ///    partition + sort, compute the per-row value.
4976    /// 3. Append the window values as synthetic columns (`__win_N`)
4977    ///    to the row schema.
4978    /// 4. Rewrite the projection to read those columns.
4979    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
4980    #[allow(
4981        clippy::too_many_lines,
4982        clippy::type_complexity,
4983        clippy::needless_range_loop
4984    )] // window-eval is one cohesive pipe; splitting fragments
4985    fn exec_select_with_window(
4986        &self,
4987        stmt: &SelectStatement,
4988        cancel: CancelToken<'_>,
4989    ) -> Result<QueryResult, EngineError> {
4990        let from = stmt.from.as_ref().ok_or_else(|| {
4991            EngineError::Unsupported("window functions require a FROM clause".into())
4992        })?;
4993        // For v4.12 we only support a single-table FROM. Joins +
4994        // windows is queued for v5.x.
4995        if !from.joins.is_empty() {
4996            return Err(EngineError::Unsupported(
4997                "JOIN with window functions not yet supported".into(),
4998            ));
4999        }
5000        let primary = &from.primary;
5001        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
5002            StorageError::TableNotFound {
5003                name: primary.name.clone(),
5004            }
5005        })?;
5006        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
5007        let schema_cols = &table.schema().columns;
5008        let ctx = EvalContext::new(schema_cols, Some(alias));
5009
5010        // 1) Filter pass.
5011        let mut filtered: Vec<&Row> = Vec::new();
5012        for (i, row) in table.rows().iter().enumerate() {
5013            if i.is_multiple_of(256) {
5014                cancel.check()?;
5015            }
5016            if let Some(w) = &stmt.where_ {
5017                let cond = eval::eval_expr(w, row, &ctx)?;
5018                if !matches!(cond, Value::Bool(true)) {
5019                    continue;
5020                }
5021            }
5022            filtered.push(row);
5023        }
5024        let n_rows = filtered.len();
5025
5026        // 2) Collect unique window function nodes from projection.
5027        let mut window_nodes: Vec<Expr> = Vec::new();
5028        for item in &stmt.items {
5029            if let SelectItem::Expr { expr, .. } = item {
5030                collect_window_nodes(expr, &mut window_nodes);
5031            }
5032        }
5033
5034        // 3) For each window, compute per-row value.
5035        // Index: same order as window_nodes; for row i, win_vals[w][i].
5036        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
5037        for wnode in &window_nodes {
5038            let Expr::WindowFunction {
5039                name,
5040                args,
5041                partition_by,
5042                order_by,
5043                frame,
5044                null_treatment,
5045            } = wnode
5046            else {
5047                unreachable!("collect_window_nodes pushes only WindowFunction");
5048            };
5049            // Compute (partition_key, order_key, original_index) for each row.
5050            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool)>, usize)> =
5051                Vec::with_capacity(n_rows);
5052            for (i, row) in filtered.iter().enumerate() {
5053                let pkey: Vec<Value> = partition_by
5054                    .iter()
5055                    .map(|p| eval::eval_expr(p, row, &ctx))
5056                    .collect::<Result<_, _>>()?;
5057                let okey: Vec<(Value, bool)> = order_by
5058                    .iter()
5059                    .map(|(e, desc)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc)))
5060                    .collect::<Result<_, _>>()?;
5061                indexed.push((pkey, okey, i));
5062            }
5063            // Sort by (partition_key, order_key). Partition key uses
5064            // a stable encoded form; order key respects ASC/DESC.
5065            indexed.sort_by(|a, b| {
5066                let p_cmp = partition_key_cmp(&a.0, &b.0);
5067                if p_cmp != core::cmp::Ordering::Equal {
5068                    return p_cmp;
5069                }
5070                order_key_cmp(&a.1, &b.1)
5071            });
5072            // Per-partition compute.
5073            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
5074            let mut p_start = 0;
5075            while p_start < indexed.len() {
5076                let mut p_end = p_start + 1;
5077                while p_end < indexed.len()
5078                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
5079                        == core::cmp::Ordering::Equal
5080                {
5081                    p_end += 1;
5082                }
5083                // Compute the function within this partition slice.
5084                compute_window_partition(
5085                    name,
5086                    args,
5087                    !order_by.is_empty(),
5088                    frame.as_ref(),
5089                    *null_treatment,
5090                    &indexed[p_start..p_end],
5091                    &filtered,
5092                    &ctx,
5093                    &mut out_vals,
5094                )?;
5095                p_start = p_end;
5096            }
5097            win_vals.push(out_vals);
5098        }
5099
5100        // 4) Build extended schema: original columns + synthetic.
5101        let mut ext_cols = schema_cols.clone();
5102        for i in 0..window_nodes.len() {
5103            ext_cols.push(ColumnSchema::new(
5104                alloc::format!("__win_{i}"),
5105                DataType::Text, // type doesn't matter for projection eval
5106                true,
5107            ));
5108        }
5109        // 5) Build extended rows: each row gets its window values appended.
5110        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
5111        for i in 0..n_rows {
5112            let mut values = filtered[i].values.clone();
5113            for w in 0..window_nodes.len() {
5114                values.push(win_vals[w][i].clone());
5115            }
5116            ext_rows.push(Row::new(values));
5117        }
5118        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
5119        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
5120        for item in &stmt.items {
5121            let new_item = match item {
5122                SelectItem::Wildcard => SelectItem::Wildcard,
5123                SelectItem::Expr { expr, alias } => {
5124                    let mut e = expr.clone();
5125                    rewrite_window_to_columns(&mut e, &window_nodes);
5126                    SelectItem::Expr {
5127                        expr: e,
5128                        alias: alias.clone(),
5129                    }
5130                }
5131            };
5132            rewritten_items.push(new_item);
5133        }
5134
5135        // 7) Project into final rows.
5136        let ext_ctx = EvalContext::new(&ext_cols, Some(alias));
5137        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
5138        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
5139        for (i, row) in ext_rows.iter().enumerate() {
5140            if i.is_multiple_of(256) {
5141                cancel.check()?;
5142            }
5143            let mut values = Vec::with_capacity(projection.len());
5144            for p in &projection {
5145                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
5146            }
5147            let order_keys = if stmt.order_by.is_empty() {
5148                Vec::new()
5149            } else {
5150                let mut keys = Vec::with_capacity(stmt.order_by.len());
5151                for o in &stmt.order_by {
5152                    let mut e = o.expr.clone();
5153                    rewrite_window_to_columns(&mut e, &window_nodes);
5154                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
5155                    keys.push(value_to_order_key(&key)?);
5156                }
5157                keys
5158            };
5159            tagged.push((order_keys, Row::new(values)));
5160        }
5161        // ORDER BY + LIMIT/OFFSET on the projected rows.
5162        if !stmt.order_by.is_empty() {
5163            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5164            sort_by_keys(&mut tagged, &descs);
5165        }
5166        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5167        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
5168        let final_cols: Vec<ColumnSchema> = projection
5169            .into_iter()
5170            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5171            .collect();
5172        Ok(QueryResult::Rows {
5173            columns: final_cols,
5174            rows: out_rows,
5175        })
5176    }
5177
5178    /// v4.11: materialise each CTE into a temp table inside a
5179    /// cloned catalog, then run the body SELECT against a fresh
5180    /// engine instance that owns the enriched catalog. The clone
5181    /// is moderately expensive — only paid by CTE-bearing queries.
5182    /// Subqueries inside CTE bodies / the main body resolve as
5183    /// usual; `clock_fn` is propagated so `NOW()` lines up.
5184    fn exec_with_ctes(
5185        &self,
5186        stmt: &SelectStatement,
5187        cancel: CancelToken<'_>,
5188    ) -> Result<QueryResult, EngineError> {
5189        cancel.check()?;
5190        let mut catalog = self.active_catalog().clone();
5191        for cte in &stmt.ctes {
5192            if catalog.get(&cte.name).is_some() {
5193                return Err(EngineError::Unsupported(alloc::format!(
5194                    "CTE name {:?} shadows an existing table; rename the CTE",
5195                    cte.name
5196                )));
5197            }
5198            let (columns, rows) = if cte.recursive {
5199                self.materialise_recursive_cte(cte, &catalog, cancel)?
5200            } else {
5201                let body_result = self.exec_select_cancel(&cte.body, cancel)?;
5202                let QueryResult::Rows { columns, rows } = body_result else {
5203                    return Err(EngineError::Unsupported(alloc::format!(
5204                        "CTE {:?} body did not return rows",
5205                        cte.name
5206                    )));
5207                };
5208                (columns, rows)
5209            };
5210            // v4.22: the projection builder labels any non-column
5211            // expression as Text — including literal SELECT 1.
5212            // Promote each column's type to whatever the rows
5213            // actually carry so the CTE storage table accepts them.
5214            let inferred = infer_column_types(&columns, &rows);
5215            let mut columns = inferred;
5216            // v4.22: apply optional `WITH name(a, b, c)` overrides.
5217            if !cte.column_overrides.is_empty() {
5218                if cte.column_overrides.len() != columns.len() {
5219                    return Err(EngineError::Unsupported(alloc::format!(
5220                        "CTE {:?} column list has {} names but body returns {} columns",
5221                        cte.name,
5222                        cte.column_overrides.len(),
5223                        columns.len()
5224                    )));
5225                }
5226                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
5227                    col.name.clone_from(name);
5228                }
5229            }
5230            let schema = TableSchema::new(cte.name.clone(), columns);
5231            catalog.create_table(schema).map_err(EngineError::Storage)?;
5232            let table = catalog
5233                .get_mut(&cte.name)
5234                .expect("just-created CTE table must exist");
5235            for row in rows {
5236                table.insert(row).map_err(EngineError::Storage)?;
5237            }
5238        }
5239        // Strip CTEs from the body before running on the temp engine
5240        // so we don't recurse forever.
5241        let mut body = stmt.clone();
5242        body.ctes = Vec::new();
5243        let mut temp = Engine::restore(catalog);
5244        if let Some(c) = self.clock {
5245            temp = temp.with_clock(c);
5246        }
5247        if let Some(f) = self.salt_fn {
5248            temp = temp.with_salt_fn(f);
5249        }
5250        temp.exec_select_cancel(&body, cancel)
5251    }
5252
5253    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
5254    /// UNION (or UNION ALL) of an anchor that does not reference
5255    /// the CTE name, and one or more recursive terms that do. The
5256    /// anchor runs first; each subsequent iteration runs the
5257    /// recursive term against a temp catalog where the CTE name is
5258    /// bound to the *previous* iteration's output. Iteration stops
5259    /// when the recursive term yields no rows; UNION (DISTINCT)
5260    /// deduplicates against the accumulated result, UNION ALL does
5261    /// not. A hard cap on total rows prevents runaway queries.
5262    #[allow(clippy::too_many_lines)]
5263    fn materialise_recursive_cte(
5264        &self,
5265        cte: &spg_sql::ast::Cte,
5266        base_catalog: &Catalog,
5267        cancel: CancelToken<'_>,
5268    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
5269        const MAX_TOTAL_ROWS: usize = 1_000_000;
5270        const MAX_ITERATIONS: usize = 100_000;
5271        cancel.check()?;
5272        if cte.body.unions.is_empty() {
5273            return Err(EngineError::Unsupported(alloc::format!(
5274                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
5275                cte.name
5276            )));
5277        }
5278        // Anchor: the body's leading SELECT, with unions stripped.
5279        let mut anchor = cte.body.clone();
5280        let union_terms = core::mem::take(&mut anchor.unions);
5281        anchor.ctes = Vec::new();
5282        // Anchor must not reference the CTE name.
5283        if select_refers_to(&anchor, &cte.name) {
5284            return Err(EngineError::Unsupported(alloc::format!(
5285                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
5286                cte.name
5287            )));
5288        }
5289        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
5290        let QueryResult::Rows {
5291            columns: anchor_cols,
5292            rows: anchor_rows,
5293        } = anchor_result
5294        else {
5295            return Err(EngineError::Unsupported(alloc::format!(
5296                "WITH RECURSIVE {:?}: anchor did not return rows",
5297                cte.name
5298            )));
5299        };
5300        // The projection builder labels non-column expressions Text;
5301        // refine column types from the anchor's actual values so the
5302        // intermediate iter-catalog tables accept them.
5303        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
5304        if !cte.column_overrides.is_empty() {
5305            if cte.column_overrides.len() != columns.len() {
5306                return Err(EngineError::Unsupported(alloc::format!(
5307                    "CTE {:?} column list has {} names but anchor returns {} columns",
5308                    cte.name,
5309                    cte.column_overrides.len(),
5310                    columns.len()
5311                )));
5312            }
5313            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
5314                col.name.clone_from(name);
5315            }
5316        }
5317        let mut all_rows: Vec<Row> = anchor_rows.clone();
5318        let mut working_set: Vec<Row> = anchor_rows;
5319        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
5320        // Track at least one "all UNION ALL" flag — if every union
5321        // kind is ALL we skip the dedup step (faster + matches PG).
5322        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
5323        if !all_union_all {
5324            for r in &all_rows {
5325                seen.insert(encode_row_key(r));
5326            }
5327        }
5328        for iter in 0..MAX_ITERATIONS {
5329            cancel.check()?;
5330            if working_set.is_empty() {
5331                break;
5332            }
5333            // Build a fresh catalog: base + CTE bound to working_set.
5334            let mut iter_catalog = base_catalog.clone();
5335            let schema = TableSchema::new(cte.name.clone(), columns.clone());
5336            iter_catalog
5337                .create_table(schema)
5338                .map_err(EngineError::Storage)?;
5339            {
5340                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
5341                for row in &working_set {
5342                    table.insert(row.clone()).map_err(EngineError::Storage)?;
5343                }
5344            }
5345            let mut iter_engine = Engine::restore(iter_catalog);
5346            if let Some(c) = self.clock {
5347                iter_engine = iter_engine.with_clock(c);
5348            }
5349            if let Some(f) = self.salt_fn {
5350                iter_engine = iter_engine.with_salt_fn(f);
5351            }
5352            // Run each recursive term in sequence and collect new rows.
5353            let mut next_set: Vec<Row> = Vec::new();
5354            for (_, term) in &union_terms {
5355                let mut term = term.clone();
5356                term.ctes = Vec::new();
5357                let r = iter_engine.exec_select_cancel(&term, cancel)?;
5358                let QueryResult::Rows {
5359                    columns: rc,
5360                    rows: rs,
5361                } = r
5362                else {
5363                    return Err(EngineError::Unsupported(alloc::format!(
5364                        "WITH RECURSIVE {:?}: recursive term did not return rows",
5365                        cte.name
5366                    )));
5367                };
5368                if rc.len() != columns.len() {
5369                    return Err(EngineError::Unsupported(alloc::format!(
5370                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
5371                        cte.name,
5372                        rc.len(),
5373                        columns.len()
5374                    )));
5375                }
5376                for row in rs {
5377                    if !all_union_all {
5378                        let key = encode_row_key(&row);
5379                        if !seen.insert(key) {
5380                            continue;
5381                        }
5382                    }
5383                    next_set.push(row);
5384                }
5385            }
5386            if next_set.is_empty() {
5387                break;
5388            }
5389            all_rows.extend(next_set.iter().cloned());
5390            working_set = next_set;
5391            if all_rows.len() > MAX_TOTAL_ROWS {
5392                return Err(EngineError::Unsupported(alloc::format!(
5393                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
5394                    cte.name
5395                )));
5396            }
5397            if iter + 1 == MAX_ITERATIONS {
5398                return Err(EngineError::Unsupported(alloc::format!(
5399                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
5400                    cte.name
5401                )));
5402            }
5403        }
5404        Ok((columns, all_rows))
5405    }
5406
5407    fn resolve_select_subqueries(
5408        &self,
5409        stmt: &mut SelectStatement,
5410        cancel: CancelToken<'_>,
5411    ) -> Result<(), EngineError> {
5412        for item in &mut stmt.items {
5413            if let SelectItem::Expr { expr, .. } = item {
5414                self.resolve_expr_subqueries(expr, cancel)?;
5415            }
5416        }
5417        if let Some(w) = &mut stmt.where_ {
5418            self.resolve_expr_subqueries(w, cancel)?;
5419        }
5420        if let Some(gs) = &mut stmt.group_by {
5421            for g in gs {
5422                self.resolve_expr_subqueries(g, cancel)?;
5423            }
5424        }
5425        if let Some(h) = &mut stmt.having {
5426            self.resolve_expr_subqueries(h, cancel)?;
5427        }
5428        for o in &mut stmt.order_by {
5429            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
5430        }
5431        for (_, peer) in &mut stmt.unions {
5432            self.resolve_select_subqueries(peer, cancel)?;
5433        }
5434        Ok(())
5435    }
5436
5437    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
5438    fn resolve_expr_subqueries(
5439        &self,
5440        e: &mut Expr,
5441        cancel: CancelToken<'_>,
5442    ) -> Result<(), EngineError> {
5443        // Replace-on-this-node cases first.
5444        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
5445            *e = replacement;
5446            return Ok(());
5447        }
5448        match e {
5449            Expr::Binary { lhs, rhs, .. } => {
5450                self.resolve_expr_subqueries(lhs, cancel)?;
5451                self.resolve_expr_subqueries(rhs, cancel)?;
5452            }
5453            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
5454                self.resolve_expr_subqueries(expr, cancel)?;
5455            }
5456            Expr::FunctionCall { args, .. } => {
5457                for a in args {
5458                    self.resolve_expr_subqueries(a, cancel)?;
5459                }
5460            }
5461            Expr::Like { expr, pattern, .. } => {
5462                self.resolve_expr_subqueries(expr, cancel)?;
5463                self.resolve_expr_subqueries(pattern, cancel)?;
5464            }
5465            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
5466            // v4.12 window functions — recurse into args + ORDER BY
5467            // + PARTITION BY in case they carry inner subqueries.
5468            Expr::WindowFunction {
5469                args,
5470                partition_by,
5471                order_by,
5472                ..
5473            } => {
5474                for a in args {
5475                    self.resolve_expr_subqueries(a, cancel)?;
5476                }
5477                for p in partition_by {
5478                    self.resolve_expr_subqueries(p, cancel)?;
5479                }
5480                for (e, _) in order_by {
5481                    self.resolve_expr_subqueries(e, cancel)?;
5482                }
5483            }
5484            // Subquery nodes are handled in subquery_replacement
5485            // (which returned None — defensive no-op); Literal /
5486            // Column are leaves.
5487            Expr::ScalarSubquery(_)
5488            | Expr::Exists { .. }
5489            | Expr::InSubquery { .. }
5490            | Expr::Literal(_)
5491            | Expr::Placeholder(_)
5492            | Expr::Column(_) => {}
5493            // v7.10.10 — recurse children.
5494            Expr::Array(items) => {
5495                for elem in items {
5496                    self.resolve_expr_subqueries(elem, cancel)?;
5497                }
5498            }
5499            Expr::ArraySubscript { target, index } => {
5500                self.resolve_expr_subqueries(target, cancel)?;
5501                self.resolve_expr_subqueries(index, cancel)?;
5502            }
5503            Expr::AnyAll { expr, array, .. } => {
5504                self.resolve_expr_subqueries(expr, cancel)?;
5505                self.resolve_expr_subqueries(array, cancel)?;
5506            }
5507        }
5508        Ok(())
5509    }
5510
5511    /// v4.23: per-row eval that handles correlated subqueries.
5512    /// Equivalent to `eval::eval_expr` when the expression has no
5513    /// subqueries; otherwise clones the expression, substitutes
5514    /// outer-row columns into each surviving subquery node, runs
5515    /// the inner SELECT, and replaces the node with the literal
5516    /// result. Only the WHERE-filter call sites use this path so
5517    /// the uncorrelated fast path is preserved everywhere else.
5518    fn eval_expr_with_correlated(
5519        &self,
5520        expr: &Expr,
5521        row: &Row,
5522        ctx: &EvalContext<'_>,
5523        cancel: CancelToken<'_>,
5524        memo: Option<&mut memoize::MemoizeCache>,
5525    ) -> Result<Value, EngineError> {
5526        if !expr_has_subquery(expr) {
5527            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
5528        }
5529        let mut e = expr.clone();
5530        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
5531        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
5532    }
5533
5534    fn resolve_correlated_in_expr(
5535        &self,
5536        e: &mut Expr,
5537        row: &Row,
5538        ctx: &EvalContext<'_>,
5539        cancel: CancelToken<'_>,
5540        mut memo: Option<&mut memoize::MemoizeCache>,
5541    ) -> Result<(), EngineError> {
5542        match e {
5543            Expr::ScalarSubquery(inner) => {
5544                // v6.2.6 — Memoize: build the cache key from the
5545                // pre-substitution subquery repr + the outer row's
5546                // values. Two outer rows with identical correlated
5547                // values hit the same entry.
5548                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
5549                    subquery_repr: alloc::format!("{}", **inner),
5550                    outer_values: row.values.clone(),
5551                });
5552                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
5553                    && let Some(cached) = cache.get(k)
5554                {
5555                    *e = value_to_literal_expr(cached)?;
5556                    return Ok(());
5557                }
5558                let mut s = (**inner).clone();
5559                substitute_outer_columns(&mut s, row, ctx);
5560                let r = self.exec_select_cancel(&s, cancel)?;
5561                let QueryResult::Rows { rows, .. } = r else {
5562                    return Err(EngineError::Unsupported(
5563                        "scalar subquery: inner did not return rows".into(),
5564                    ));
5565                };
5566                let value = match rows.as_slice() {
5567                    [] => Value::Null,
5568                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
5569                    _ => {
5570                        return Err(EngineError::Unsupported(alloc::format!(
5571                            "scalar subquery returned {} rows; expected 0 or 1",
5572                            rows.len()
5573                        )));
5574                    }
5575                };
5576                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
5577                    cache.insert(k, value.clone());
5578                }
5579                *e = value_to_literal_expr(value)?;
5580            }
5581            Expr::Exists { subquery, negated } => {
5582                let mut s = (**subquery).clone();
5583                substitute_outer_columns(&mut s, row, ctx);
5584                let r = self.exec_select_cancel(&s, cancel)?;
5585                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
5586                let bit = if *negated { !exists } else { exists };
5587                *e = Expr::Literal(Literal::Bool(bit));
5588            }
5589            Expr::InSubquery {
5590                expr: lhs,
5591                subquery,
5592                negated,
5593            } => {
5594                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
5595                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
5596                let mut s = (**subquery).clone();
5597                substitute_outer_columns(&mut s, row, ctx);
5598                let r = self.exec_select_cancel(&s, cancel)?;
5599                let QueryResult::Rows { columns, rows, .. } = r else {
5600                    return Err(EngineError::Unsupported(
5601                        "IN-subquery: inner did not return rows".into(),
5602                    ));
5603                };
5604                if columns.len() != 1 {
5605                    return Err(EngineError::Unsupported(alloc::format!(
5606                        "IN-subquery must project exactly one column; got {}",
5607                        columns.len()
5608                    )));
5609                }
5610                let mut found = false;
5611                let mut any_null = false;
5612                for r0 in rows {
5613                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
5614                    if v.is_null() {
5615                        any_null = true;
5616                        continue;
5617                    }
5618                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
5619                        found = true;
5620                        break;
5621                    }
5622                }
5623                let bit = if found {
5624                    !*negated
5625                } else if any_null {
5626                    return Err(EngineError::Unsupported(
5627                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
5628                    ));
5629                } else {
5630                    *negated
5631                };
5632                *e = Expr::Literal(Literal::Bool(bit));
5633            }
5634            Expr::Binary { lhs, rhs, .. } => {
5635                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
5636                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
5637            }
5638            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
5639                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
5640            }
5641            Expr::Like { expr, pattern, .. } => {
5642                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
5643                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
5644            }
5645            Expr::FunctionCall { args, .. } => {
5646                for a in args {
5647                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
5648                }
5649            }
5650            Expr::Extract { source, .. } => {
5651                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
5652            }
5653            Expr::WindowFunction { .. }
5654            | Expr::Literal(_)
5655            | Expr::Placeholder(_)
5656            | Expr::Column(_) => {}
5657            // v7.10.10 — recurse children.
5658            Expr::Array(items) => {
5659                for elem in items {
5660                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
5661                }
5662            }
5663            Expr::ArraySubscript { target, index } => {
5664                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
5665                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
5666            }
5667            Expr::AnyAll { expr, array, .. } => {
5668                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
5669                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
5670            }
5671        }
5672        Ok(())
5673    }
5674
5675    fn subquery_replacement(
5676        &self,
5677        e: &Expr,
5678        cancel: CancelToken<'_>,
5679    ) -> Result<Option<Expr>, EngineError> {
5680        match e {
5681            Expr::ScalarSubquery(inner) => {
5682                let mut s = (**inner).clone();
5683                // Recurse into the inner SELECT first so nested
5684                // subqueries materialise bottom-up.
5685                self.resolve_select_subqueries(&mut s, cancel)?;
5686                let r = match self.exec_bare_select_cancel(&s, cancel) {
5687                    Ok(r) => r,
5688                    Err(e) if is_correlation_error(&e) => return Ok(None),
5689                    Err(e) => return Err(e),
5690                };
5691                let QueryResult::Rows { rows, .. } = r else {
5692                    return Err(EngineError::Unsupported(
5693                        "scalar subquery: inner statement did not return rows".into(),
5694                    ));
5695                };
5696                let value = match rows.as_slice() {
5697                    [] => Value::Null,
5698                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
5699                    _ => {
5700                        return Err(EngineError::Unsupported(alloc::format!(
5701                            "scalar subquery returned {} rows; expected 0 or 1",
5702                            rows.len()
5703                        )));
5704                    }
5705                };
5706                Ok(Some(value_to_literal_expr(value)?))
5707            }
5708            Expr::Exists { subquery, negated } => {
5709                let mut s = (**subquery).clone();
5710                self.resolve_select_subqueries(&mut s, cancel)?;
5711                let r = match self.exec_bare_select_cancel(&s, cancel) {
5712                    Ok(r) => r,
5713                    Err(e) if is_correlation_error(&e) => return Ok(None),
5714                    Err(e) => return Err(e),
5715                };
5716                let exists = match r {
5717                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
5718                    QueryResult::CommandOk { .. } => false,
5719                };
5720                let bit = if *negated { !exists } else { exists };
5721                Ok(Some(Expr::Literal(Literal::Bool(bit))))
5722            }
5723            Expr::InSubquery {
5724                expr,
5725                subquery,
5726                negated,
5727            } => {
5728                let mut s = (**subquery).clone();
5729                self.resolve_select_subqueries(&mut s, cancel)?;
5730                let r = match self.exec_bare_select_cancel(&s, cancel) {
5731                    Ok(r) => r,
5732                    Err(e) if is_correlation_error(&e) => return Ok(None),
5733                    Err(e) => return Err(e),
5734                };
5735                let QueryResult::Rows { columns, rows, .. } = r else {
5736                    return Err(EngineError::Unsupported(
5737                        "IN-subquery: inner statement did not return rows".into(),
5738                    ));
5739                };
5740                if columns.len() != 1 {
5741                    return Err(EngineError::Unsupported(alloc::format!(
5742                        "IN-subquery must project exactly one column; got {}",
5743                        columns.len()
5744                    )));
5745                }
5746                // Build the same OR-Eq chain the parse-time literal-list
5747                // path constructs, with each value lifted into a Literal.
5748                let mut acc: Option<Expr> = None;
5749                for row in rows {
5750                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
5751                    let lit = value_to_literal_expr(v)?;
5752                    let cmp = Expr::Binary {
5753                        lhs: expr.clone(),
5754                        op: BinOp::Eq,
5755                        rhs: Box::new(lit),
5756                    };
5757                    acc = Some(match acc {
5758                        None => cmp,
5759                        Some(prev) => Expr::Binary {
5760                            lhs: Box::new(prev),
5761                            op: BinOp::Or,
5762                            rhs: Box::new(cmp),
5763                        },
5764                    });
5765                }
5766                let combined = acc.unwrap_or(Expr::Literal(Literal::Bool(false)));
5767                let final_expr = if *negated {
5768                    Expr::Unary {
5769                        op: UnOp::Not,
5770                        expr: Box::new(combined),
5771                    }
5772                } else {
5773                    combined
5774                };
5775                Ok(Some(final_expr))
5776            }
5777            _ => Ok(None),
5778        }
5779    }
5780}
5781
5782// ---- v4.12 window-function helpers ----
5783// The (partition-key, order-key, original-index) tuple shape used
5784// across these helpers is intrinsic to the planner. Factoring it
5785// into a typedef adds indirection without making the code clearer,
5786// so several lints are allowed inline on the affected functions
5787// rather than module-wide.
5788
5789/// v4.22: cheap structural scan for `FROM <name>` (qualified or
5790/// not) inside a SELECT — used to verify the anchor of a WITH
5791/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
5792/// FROM joins, subqueries, and unions.
5793fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
5794    if let Some(from) = &stmt.from
5795        && from_refers_to(from, target)
5796    {
5797        return true;
5798    }
5799    for (_, peer) in &stmt.unions {
5800        if select_refers_to(peer, target) {
5801            return true;
5802        }
5803    }
5804    for item in &stmt.items {
5805        if let SelectItem::Expr { expr, .. } = item
5806            && expr_refers_to(expr, target)
5807        {
5808            return true;
5809        }
5810    }
5811    if let Some(w) = &stmt.where_
5812        && expr_refers_to(w, target)
5813    {
5814        return true;
5815    }
5816    false
5817}
5818
5819fn from_refers_to(from: &FromClause, target: &str) -> bool {
5820    if from.primary.name.eq_ignore_ascii_case(target) {
5821        return true;
5822    }
5823    from.joins
5824        .iter()
5825        .any(|j| j.table.name.eq_ignore_ascii_case(target))
5826}
5827
5828fn expr_refers_to(e: &Expr, target: &str) -> bool {
5829    match e {
5830        Expr::ScalarSubquery(s) => select_refers_to(s, target),
5831        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
5832            select_refers_to(subquery, target)
5833        }
5834        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
5835        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
5836            expr_refers_to(expr, target)
5837        }
5838        Expr::Like { expr, pattern, .. } => {
5839            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
5840        }
5841        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
5842        Expr::Extract { source, .. } => expr_refers_to(source, target),
5843        Expr::WindowFunction {
5844            args,
5845            partition_by,
5846            order_by,
5847            ..
5848        } => {
5849            args.iter().any(|a| expr_refers_to(a, target))
5850                || partition_by.iter().any(|p| expr_refers_to(p, target))
5851                || order_by.iter().any(|(o, _)| expr_refers_to(o, target))
5852        }
5853        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
5854        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
5855        Expr::ArraySubscript { target: t, index } => {
5856            expr_refers_to(t, target) || expr_refers_to(index, target)
5857        }
5858        Expr::AnyAll { expr, array, .. } => {
5859            expr_refers_to(expr, target) || expr_refers_to(array, target)
5860        }
5861    }
5862}
5863
5864/// v4.22: pick more specific column types from observed rows when
5865/// the projection builder defaulted to Text (the v1.x behavior for
5866/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
5867/// land an Int column in the CTE storage table rather than failing
5868/// the insert with "expected TEXT, got INT".
5869fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
5870    let mut out = columns.to_vec();
5871    for (col_idx, col) in out.iter_mut().enumerate() {
5872        if col.ty != DataType::Text {
5873            continue;
5874        }
5875        let mut inferred: Option<DataType> = None;
5876        let mut all_null = true;
5877        for row in rows {
5878            let Some(v) = row.values.get(col_idx) else {
5879                continue;
5880            };
5881            let ty = match v {
5882                Value::Null => continue,
5883                Value::SmallInt(_) => DataType::SmallInt,
5884                Value::Int(_) => DataType::Int,
5885                Value::BigInt(_) => DataType::BigInt,
5886                Value::Float(_) => DataType::Float,
5887                Value::Bool(_) => DataType::Bool,
5888                Value::Vector(_) => DataType::Vector {
5889                    dim: 0,
5890                    encoding: VecEncoding::F32,
5891                },
5892                _ => DataType::Text,
5893            };
5894            all_null = false;
5895            inferred = Some(match inferred {
5896                None => ty,
5897                Some(prev) if prev == ty => prev,
5898                Some(_) => DataType::Text,
5899            });
5900        }
5901        if let Some(t) = inferred {
5902            col.ty = t;
5903            col.nullable = true;
5904        } else if all_null {
5905            col.nullable = true;
5906        }
5907    }
5908    out
5909}
5910
5911/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
5912/// Lines are pushed into `out`; `depth` controls indentation. We
5913/// describe the rewritten SELECT — what the executor *would* do —
5914/// using the engine handle to spot indexed lookups and table shapes.
5915#[allow(clippy::too_many_lines, clippy::format_push_string)]
5916/// v6.2.4 — Walk every line of the rendered plan tree and append
5917/// per-operator stats. Lines that name a known operator get
5918/// `(rows=N)` (`actual_rows` of the top-level operator equals the
5919/// final result row count; scans report their catalog row count
5920/// as the rows-considered metric). Other lines — Filter / Join /
5921/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
5922/// complete-by-construction; v6.2.5 fills these in via inline
5923/// executor counters.
5924/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
5925/// `(table, column)` pair the query touches via WHERE / JOIN
5926/// that doesn't already have an index on the owning table.
5927/// Walks the SELECT's FROM clauses + WHERE expression tree;
5928/// returns one line per missing index. Deterministic order:
5929/// FROM-clause iteration order, then column-reference walk
5930/// order inside each WHERE. Each suggestion is a copy-pastable
5931/// DDL string.
5932fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
5933    use alloc::collections::BTreeSet;
5934    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
5935    let mut out: Vec<String> = Vec::new();
5936    let cat = engine.active_catalog();
5937    // Build a (table, qualifier-or-alias) list from the FROM clause
5938    // so unqualified column refs in WHERE resolve to the correct
5939    // table.
5940    let Some(from) = &stmt.from else {
5941        return out;
5942    };
5943    let mut tables: Vec<String> = Vec::new();
5944    tables.push(from.primary.name.clone());
5945    for j in &from.joins {
5946        tables.push(j.table.name.clone());
5947    }
5948    // Collect column refs from the WHERE expression. JOIN ON
5949    // predicates also feed in.
5950    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
5951    if let Some(w) = &stmt.where_ {
5952        collect_column_refs(w, &mut col_refs);
5953    }
5954    for j in &from.joins {
5955        if let Some(on) = &j.on {
5956            collect_column_refs(on, &mut col_refs);
5957        }
5958    }
5959    for cn in &col_refs {
5960        // Resolve owner table: explicit qualifier first, else
5961        // first table in FROM that has a column of this name.
5962        let owner: Option<String> = if let Some(q) = &cn.qualifier {
5963            tables.iter().find(|t| t == &q).cloned()
5964        } else {
5965            tables.iter().find_map(|t| {
5966                cat.get(t).and_then(|tbl| {
5967                    if tbl.schema().column_position(&cn.name).is_some() {
5968                        Some(t.clone())
5969                    } else {
5970                        None
5971                    }
5972                })
5973            })
5974        };
5975        let Some(owner) = owner else {
5976            continue;
5977        };
5978        let Some(tbl) = cat.get(&owner) else {
5979            continue;
5980        };
5981        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
5982            continue;
5983        };
5984        // Skip if any BTree index already covers this column as
5985        // its key.
5986        let already_indexed = tbl.indices().iter().any(|i| {
5987            matches!(i.kind, spg_storage::IndexKind::BTree(_))
5988                && i.column_position == col_pos
5989                && i.expression.is_none()
5990                && i.partial_predicate.is_none()
5991        });
5992        if already_indexed {
5993            continue;
5994        }
5995        if seen.insert((owner.clone(), cn.name.clone())) {
5996            out.push(alloc::format!(
5997                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
5998                owner,
5999                cn.name,
6000                owner,
6001                cn.name
6002            ));
6003        }
6004    }
6005    out
6006}
6007
6008/// Walks an `Expr` and pushes every `ColumnName` it references.
6009/// Order is depth-first, left-to-right.
6010fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
6011    match expr {
6012        Expr::Column(cn) => out.push(cn.clone()),
6013        Expr::FunctionCall { args, .. } => {
6014            for a in args {
6015                collect_column_refs(a, out);
6016            }
6017        }
6018        Expr::Binary { lhs, rhs, .. } => {
6019            collect_column_refs(lhs, out);
6020            collect_column_refs(rhs, out);
6021        }
6022        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
6023        _ => {}
6024    }
6025}
6026
6027fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
6028    let catalog = engine.active_catalog();
6029    let cold_ids = catalog.cold_segment_ids_global();
6030    let any_cold = !cold_ids.is_empty();
6031    let cold_ids_repr = if any_cold {
6032        let mut s = alloc::string::String::from("[");
6033        for (i, id) in cold_ids.iter().enumerate() {
6034            if i > 0 {
6035                s.push(',');
6036            }
6037            s.push_str(&alloc::format!("{id}"));
6038        }
6039        s.push(']');
6040        s
6041    } else {
6042        alloc::string::String::new()
6043    };
6044    for (idx, line) in lines.iter_mut().enumerate() {
6045        let trimmed = line.trim_start();
6046        let is_top_level = idx == 0;
6047        if is_top_level {
6048            line.push_str(&alloc::format!(" (rows={total_rows})"));
6049            continue;
6050        }
6051        if let Some(rest) = trimmed.strip_prefix("From: ") {
6052            let (name, scan_kind) = match rest.split_once(" [") {
6053                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
6054                None => (rest.trim(), ""),
6055            };
6056            let bare = name.split_whitespace().next().unwrap_or(name);
6057            let hot = catalog.get(bare).map(|t| t.rows().len());
6058            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
6059            // cold-tier segment the scan COULD have walked. v6.2.x
6060            // can tighten to per-table by walking the table's
6061            // BTree-index cold locators.
6062            let annot = match (hot, scan_kind) {
6063                (Some(h), "full scan") => {
6064                    let mut s = alloc::format!(" (hot_rows={h}");
6065                    if any_cold {
6066                        s.push_str(&alloc::format!(
6067                            ", cold_tier=present, cold_segments={cold_ids_repr}"
6068                        ));
6069                    }
6070                    s.push(')');
6071                    s
6072                }
6073                (Some(h), "index seek") => {
6074                    let mut s = alloc::format!(" (hot_rows≤{h}");
6075                    if any_cold {
6076                        s.push_str(&alloc::format!(
6077                            ", cold_tier=present, cold_segments={cold_ids_repr}"
6078                        ));
6079                    }
6080                    s.push(')');
6081                    s
6082                }
6083                _ => " (rows=—)".to_string(),
6084            };
6085            line.push_str(&annot);
6086            continue;
6087        }
6088        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
6089        line.push_str(" (rows=—)");
6090    }
6091}
6092
6093fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
6094    let pad = "  ".repeat(depth);
6095    // 1) Top-level operator label.
6096    let top = if !stmt.ctes.is_empty() {
6097        if stmt.ctes.iter().any(|c| c.recursive) {
6098            "CTEScan (WITH RECURSIVE)"
6099        } else {
6100            "CTEScan (WITH)"
6101        }
6102    } else if !stmt.unions.is_empty() {
6103        "UnionScan"
6104    } else if select_has_window(stmt) {
6105        "WindowAgg"
6106    } else if aggregate::uses_aggregate(stmt) {
6107        "Aggregate"
6108    } else if stmt.distinct {
6109        "Distinct"
6110    } else if stmt.from.is_some() {
6111        "TableScan"
6112    } else {
6113        "Result"
6114    };
6115    out.push(alloc::format!("{pad}{top}"));
6116    let child = "  ".repeat(depth + 1);
6117    // 2) CTE bodies.
6118    for cte in &stmt.ctes {
6119        let head = if cte.recursive {
6120            alloc::format!("{child}CTE (recursive): {}", cte.name)
6121        } else {
6122            alloc::format!("{child}CTE: {}", cte.name)
6123        };
6124        out.push(head);
6125        explain_select(&cte.body, engine, depth + 2, out);
6126    }
6127    // 3) FROM details — primary table + joins, index hits.
6128    if let Some(from) = &stmt.from {
6129        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
6130        if let Some(alias) = &from.primary.alias {
6131            tag.push_str(&alloc::format!(" AS {alias}"));
6132        }
6133        // Try to detect an index-seek opportunity on WHERE against
6134        // the primary table — same heuristic the executor uses.
6135        if let Some(w) = &stmt.where_
6136            && let Some(table) = engine.active_catalog().get(&from.primary.name)
6137        {
6138            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
6139            let cols = &table.schema().columns;
6140            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
6141                tag.push_str(" [index seek]");
6142            } else {
6143                tag.push_str(" [full scan]");
6144            }
6145        } else {
6146            tag.push_str(" [full scan]");
6147        }
6148        out.push(tag);
6149        for j in &from.joins {
6150            let kind = match j.kind {
6151                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
6152                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
6153                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
6154            };
6155            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
6156            if let Some(alias) = &j.table.alias {
6157                s.push_str(&alloc::format!(" AS {alias}"));
6158            }
6159            if j.on.is_some() {
6160                s.push_str(" (ON …)");
6161            }
6162            out.push(s);
6163        }
6164    }
6165    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
6166    if let Some(w) = &stmt.where_ {
6167        let mut s = alloc::format!("{child}Filter: {w}");
6168        if expr_has_subquery(w) {
6169            s.push_str(" [subquery]");
6170        }
6171        out.push(s);
6172    }
6173    if let Some(gs) = &stmt.group_by {
6174        let mut parts = Vec::new();
6175        for g in gs {
6176            parts.push(alloc::format!("{g}"));
6177        }
6178        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
6179    }
6180    if let Some(h) = &stmt.having {
6181        out.push(alloc::format!("{child}Having: {h}"));
6182    }
6183    for o in &stmt.order_by {
6184        let dir = if o.desc { "DESC" } else { "ASC" };
6185        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
6186    }
6187    if let Some(lim) = stmt.limit {
6188        out.push(alloc::format!("{child}Limit: {lim}"));
6189    }
6190    if let Some(off) = stmt.offset {
6191        out.push(alloc::format!("{child}Offset: {off}"));
6192    }
6193    // 5) Projection — collapse Wildcard or render N items.
6194    if stmt
6195        .items
6196        .iter()
6197        .any(|it| matches!(it, SelectItem::Wildcard))
6198    {
6199        out.push(alloc::format!("{child}Project: *"));
6200    } else {
6201        out.push(alloc::format!(
6202            "{child}Project: {} item(s)",
6203            stmt.items.len()
6204        ));
6205    }
6206    // 6) Recurse into UNION peers.
6207    for (kind, peer) in &stmt.unions {
6208        let label = match kind {
6209            UnionKind::All => "UNION ALL",
6210            UnionKind::Distinct => "UNION",
6211        };
6212        out.push(alloc::format!("{child}{label}"));
6213        explain_select(peer, engine, depth + 2, out);
6214    }
6215}
6216
6217/// v4.23: recognise the engine errors that indicate the inner
6218/// SELECT couldn't be evaluated in isolation because it references
6219/// an outer column — used by `subquery_replacement` to skip
6220/// materialisation and let row-eval handle it instead.
6221fn is_correlation_error(e: &EngineError) -> bool {
6222    matches!(
6223        e,
6224        EngineError::Eval(
6225            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
6226        )
6227    )
6228}
6229
6230/// v4.23: walk every Expr in `stmt` and replace each Column ref
6231/// that targets the outer scope (qualifier matches the outer
6232/// table alias) with a Literal carrying the outer row's value.
6233/// Conservative: only qualified refs are substituted, so the user
6234/// must write `outer_alias.col` to reference an outer column. This
6235/// matches PG's lexical scoping for correlated subqueries and
6236/// avoids accidentally rebinding inner columns of the same name.
6237fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
6238    let Some(outer_alias) = ctx.table_alias else {
6239        return;
6240    };
6241    substitute_in_select(stmt, row, ctx, outer_alias);
6242}
6243
6244fn substitute_in_select(
6245    stmt: &mut SelectStatement,
6246    row: &Row,
6247    ctx: &EvalContext<'_>,
6248    outer_alias: &str,
6249) {
6250    for item in &mut stmt.items {
6251        if let SelectItem::Expr { expr, .. } = item {
6252            substitute_in_expr(expr, row, ctx, outer_alias);
6253        }
6254    }
6255    if let Some(w) = &mut stmt.where_ {
6256        substitute_in_expr(w, row, ctx, outer_alias);
6257    }
6258    if let Some(gs) = &mut stmt.group_by {
6259        for g in gs {
6260            substitute_in_expr(g, row, ctx, outer_alias);
6261        }
6262    }
6263    if let Some(h) = &mut stmt.having {
6264        substitute_in_expr(h, row, ctx, outer_alias);
6265    }
6266    for o in &mut stmt.order_by {
6267        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
6268    }
6269    for (_, peer) in &mut stmt.unions {
6270        substitute_in_select(peer, row, ctx, outer_alias);
6271    }
6272}
6273
6274fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
6275    if let Expr::Column(c) = e
6276        && let Some(qual) = &c.qualifier
6277        && qual.eq_ignore_ascii_case(outer_alias)
6278    {
6279        // Look up the column's index in the outer schema.
6280        if let Some(idx) = ctx
6281            .columns
6282            .iter()
6283            .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
6284        {
6285            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
6286            if let Ok(lit) = value_to_literal_expr(v) {
6287                *e = lit;
6288                return;
6289            }
6290        }
6291    }
6292    match e {
6293        Expr::Binary { lhs, rhs, .. } => {
6294            substitute_in_expr(lhs, row, ctx, outer_alias);
6295            substitute_in_expr(rhs, row, ctx, outer_alias);
6296        }
6297        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6298            substitute_in_expr(expr, row, ctx, outer_alias);
6299        }
6300        Expr::Like { expr, pattern, .. } => {
6301            substitute_in_expr(expr, row, ctx, outer_alias);
6302            substitute_in_expr(pattern, row, ctx, outer_alias);
6303        }
6304        Expr::FunctionCall { args, .. } => {
6305            for a in args {
6306                substitute_in_expr(a, row, ctx, outer_alias);
6307            }
6308        }
6309        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
6310        Expr::WindowFunction {
6311            args,
6312            partition_by,
6313            order_by,
6314            ..
6315        } => {
6316            for a in args {
6317                substitute_in_expr(a, row, ctx, outer_alias);
6318            }
6319            for p in partition_by {
6320                substitute_in_expr(p, row, ctx, outer_alias);
6321            }
6322            for (o, _) in order_by {
6323                substitute_in_expr(o, row, ctx, outer_alias);
6324            }
6325        }
6326        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
6327        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
6328            substitute_in_select(subquery, row, ctx, outer_alias);
6329        }
6330        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
6331        Expr::Array(items) => {
6332            for elem in items {
6333                substitute_in_expr(elem, row, ctx, outer_alias);
6334            }
6335        }
6336        Expr::ArraySubscript { target, index } => {
6337            substitute_in_expr(target, row, ctx, outer_alias);
6338            substitute_in_expr(index, row, ctx, outer_alias);
6339        }
6340        Expr::AnyAll { expr, array, .. } => {
6341            substitute_in_expr(expr, row, ctx, outer_alias);
6342            substitute_in_expr(array, row, ctx, outer_alias);
6343        }
6344    }
6345}
6346
6347/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
6348/// dedup inside the recursive iteration. Crude but deterministic
6349/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
6350fn encode_row_key(row: &Row) -> Vec<u8> {
6351    let mut out = Vec::new();
6352    for v in &row.values {
6353        let s = alloc::format!("{v:?}|");
6354        out.extend_from_slice(s.as_bytes());
6355    }
6356    out
6357}
6358
6359fn select_has_window(stmt: &SelectStatement) -> bool {
6360    for item in &stmt.items {
6361        if let SelectItem::Expr { expr, .. } = item
6362            && expr_has_window(expr)
6363        {
6364            return true;
6365        }
6366    }
6367    false
6368}
6369
6370fn expr_has_window(e: &Expr) -> bool {
6371    match e {
6372        Expr::WindowFunction { .. } => true,
6373        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
6374        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6375            expr_has_window(expr)
6376        }
6377        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
6378        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
6379        Expr::Extract { source, .. } => expr_has_window(source),
6380        Expr::ScalarSubquery(_)
6381        | Expr::Exists { .. }
6382        | Expr::InSubquery { .. }
6383        | Expr::Literal(_)
6384        | Expr::Placeholder(_)
6385        | Expr::Column(_) => false,
6386        Expr::Array(items) => items.iter().any(expr_has_window),
6387        Expr::ArraySubscript { target, index } => expr_has_window(target) || expr_has_window(index),
6388        Expr::AnyAll { expr, array, .. } => expr_has_window(expr) || expr_has_window(array),
6389    }
6390}
6391
6392fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
6393    if let Expr::WindowFunction { .. } = e {
6394        // Deduplicate by structural equality on the expression
6395        // (cheap because window args + partition + order are
6396        // small). Without dedup we'd recompute identical windows
6397        // once per occurrence in the projection.
6398        if !out.iter().any(|x| x == e) {
6399            out.push(e.clone());
6400        }
6401        return;
6402    }
6403    match e {
6404        // Already handled by the early-return at the top.
6405        Expr::WindowFunction { .. } => unreachable!(),
6406        Expr::Binary { lhs, rhs, .. } => {
6407            collect_window_nodes(lhs, out);
6408            collect_window_nodes(rhs, out);
6409        }
6410        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6411            collect_window_nodes(expr, out);
6412        }
6413        Expr::FunctionCall { args, .. } => {
6414            for a in args {
6415                collect_window_nodes(a, out);
6416            }
6417        }
6418        Expr::Like { expr, pattern, .. } => {
6419            collect_window_nodes(expr, out);
6420            collect_window_nodes(pattern, out);
6421        }
6422        Expr::Extract { source, .. } => collect_window_nodes(source, out),
6423        _ => {}
6424    }
6425}
6426
6427fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
6428    if let Expr::WindowFunction { .. } = e
6429        && let Some(idx) = window_nodes.iter().position(|w| w == e)
6430    {
6431        *e = Expr::Column(spg_sql::ast::ColumnName {
6432            qualifier: None,
6433            name: alloc::format!("__win_{idx}"),
6434        });
6435        return;
6436    }
6437    match e {
6438        Expr::Binary { lhs, rhs, .. } => {
6439            rewrite_window_to_columns(lhs, window_nodes);
6440            rewrite_window_to_columns(rhs, window_nodes);
6441        }
6442        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6443            rewrite_window_to_columns(expr, window_nodes);
6444        }
6445        Expr::FunctionCall { args, .. } => {
6446            for a in args {
6447                rewrite_window_to_columns(a, window_nodes);
6448            }
6449        }
6450        Expr::Like { expr, pattern, .. } => {
6451            rewrite_window_to_columns(expr, window_nodes);
6452            rewrite_window_to_columns(pattern, window_nodes);
6453        }
6454        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
6455        _ => {}
6456    }
6457}
6458
6459/// Total order over partition-key tuples. NULL sorts as the
6460/// lowest value (matches the `<` partial order's NULL-last
6461/// behaviour with `INFINITY` flipped).
6462fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
6463    for (x, y) in a.iter().zip(b.iter()) {
6464        let c = value_cmp(x, y);
6465        if c != core::cmp::Ordering::Equal {
6466            return c;
6467        }
6468    }
6469    a.len().cmp(&b.len())
6470}
6471
6472fn order_key_cmp(a: &[(Value, bool)], b: &[(Value, bool)]) -> core::cmp::Ordering {
6473    for ((va, desc), (vb, _)) in a.iter().zip(b.iter()) {
6474        let c = value_cmp(va, vb);
6475        let c = if *desc { c.reverse() } else { c };
6476        if c != core::cmp::Ordering::Equal {
6477            return c;
6478        }
6479    }
6480    a.len().cmp(&b.len())
6481}
6482
6483#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
6484fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
6485    use core::cmp::Ordering;
6486    match (a, b) {
6487        (Value::Null, Value::Null) => Ordering::Equal,
6488        (Value::Null, _) => Ordering::Less,
6489        (_, Value::Null) => Ordering::Greater,
6490        (Value::Int(x), Value::Int(y)) => x.cmp(y),
6491        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
6492        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
6493        (Value::Text(x), Value::Text(y)) => x.cmp(y),
6494        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
6495        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
6496        (Value::Date(x), Value::Date(y)) => x.cmp(y),
6497        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
6498        // Cross-type compare: fall back to the debug rendering —
6499        // same-partition is the goal, exact order is irrelevant.
6500        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
6501    }
6502}
6503
6504/// Compute the window function's per-row output for one partition.
6505/// `slice` has (partition key, order key, original-row-index)
6506/// tuples already sorted by order key. `filtered_rows` is the
6507/// full row list indexed by original-row-index. `out_vals` is
6508/// the destination, also indexed by original-row-index.
6509#[allow(
6510    clippy::too_many_arguments,
6511    clippy::cast_possible_truncation,
6512    clippy::cast_possible_wrap,
6513    clippy::cast_precision_loss,
6514    clippy::cast_sign_loss,
6515    clippy::doc_markdown,
6516    clippy::too_many_lines,
6517    clippy::type_complexity,
6518    clippy::match_same_arms
6519)]
6520fn compute_window_partition(
6521    name: &str,
6522    args: &[Expr],
6523    ordered: bool,
6524    frame: Option<&WindowFrame>,
6525    null_treatment: spg_sql::ast::NullTreatment,
6526    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
6527    filtered_rows: &[&Row],
6528    ctx: &EvalContext<'_>,
6529    out_vals: &mut [Value],
6530) -> Result<(), EngineError> {
6531    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
6532    let lower = name.to_ascii_lowercase();
6533    match lower.as_str() {
6534        "row_number" => {
6535            for (rank, (_, _, idx)) in slice.iter().enumerate() {
6536                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
6537            }
6538            Ok(())
6539        }
6540        "rank" => {
6541            let mut prev_key: Option<&[(Value, bool)]> = None;
6542            let mut current_rank: i64 = 1;
6543            for (i, (_, okey, idx)) in slice.iter().enumerate() {
6544                if let Some(p) = prev_key
6545                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
6546                {
6547                    current_rank = (i + 1) as i64;
6548                }
6549                if prev_key.is_none() {
6550                    current_rank = 1;
6551                }
6552                out_vals[*idx] = Value::BigInt(current_rank);
6553                prev_key = Some(okey.as_slice());
6554            }
6555            Ok(())
6556        }
6557        "dense_rank" => {
6558            let mut prev_key: Option<&[(Value, bool)]> = None;
6559            let mut current_rank: i64 = 0;
6560            for (_, okey, idx) in slice {
6561                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
6562                    current_rank += 1;
6563                }
6564                out_vals[*idx] = Value::BigInt(current_rank);
6565                prev_key = Some(okey.as_slice());
6566            }
6567            Ok(())
6568        }
6569        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
6570            // Pre-evaluate the function arg per row in the slice
6571            // (count_star has no arg).
6572            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
6573                slice.iter().map(|_| Value::Null).collect()
6574            } else {
6575                slice
6576                    .iter()
6577                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
6578                    .collect::<Result<_, _>>()
6579                    .map_err(EngineError::Eval)?
6580            };
6581            // v4.20: pick the effective frame. Explicit frame
6582            // overrides the implicit default (running for ordered,
6583            // whole-partition for unordered).
6584            let eff = effective_frame(frame, ordered)?;
6585            #[allow(clippy::needless_range_loop)]
6586            for i in 0..slice.len() {
6587                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
6588                let mut sum: f64 = 0.0;
6589                let mut count: i64 = 0;
6590                let mut min_v: Option<f64> = None;
6591                let mut max_v: Option<f64> = None;
6592                let mut row_count: i64 = 0;
6593                if lo <= hi {
6594                    for j in lo..=hi {
6595                        let v = &arg_values[j];
6596                        match lower.as_str() {
6597                            "count_star" => row_count += 1,
6598                            "count" => {
6599                                if !v.is_null() {
6600                                    count += 1;
6601                                }
6602                            }
6603                            _ => {
6604                                if let Some(x) = value_to_f64(v) {
6605                                    sum += x;
6606                                    count += 1;
6607                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
6608                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
6609                                }
6610                            }
6611                        }
6612                    }
6613                }
6614                let value = match lower.as_str() {
6615                    "count_star" => Value::BigInt(row_count),
6616                    "count" => Value::BigInt(count),
6617                    "sum" => Value::Float(sum),
6618                    "avg" => {
6619                        if count == 0 {
6620                            Value::Null
6621                        } else {
6622                            Value::Float(sum / count as f64)
6623                        }
6624                    }
6625                    "min" => min_v.map_or(Value::Null, Value::Float),
6626                    "max" => max_v.map_or(Value::Null, Value::Float),
6627                    _ => unreachable!(),
6628                };
6629                let (_, _, idx) = &slice[i];
6630                out_vals[*idx] = value;
6631            }
6632            Ok(())
6633        }
6634        "lag" | "lead" => {
6635            // lag(expr [, offset [, default]])
6636            // lead(expr [, offset [, default]])
6637            if args.is_empty() {
6638                return Err(EngineError::Unsupported(alloc::format!(
6639                    "{lower}() requires at least one argument"
6640                )));
6641            }
6642            let offset: i64 = if args.len() >= 2 {
6643                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
6644                    .map_err(EngineError::Eval)?;
6645                match v {
6646                    Value::SmallInt(n) => i64::from(n),
6647                    Value::Int(n) => i64::from(n),
6648                    Value::BigInt(n) => n,
6649                    _ => {
6650                        return Err(EngineError::Unsupported(alloc::format!(
6651                            "{lower}() offset must be integer"
6652                        )));
6653                    }
6654                }
6655            } else {
6656                1
6657            };
6658            let default: Value = if args.len() >= 3 {
6659                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
6660                    .map_err(EngineError::Eval)?
6661            } else {
6662                Value::Null
6663            };
6664            let values: Vec<Value> = slice
6665                .iter()
6666                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
6667                .collect::<Result<_, _>>()
6668                .map_err(EngineError::Eval)?;
6669            let n = slice.len();
6670            for (i, (_, _, idx)) in slice.iter().enumerate() {
6671                let signed_offset = if lower == "lag" { -offset } else { offset };
6672                let v = if ignore_nulls {
6673                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
6674                    // skipping NULL values; the `offset`-th non-NULL
6675                    // encountered is the result.
6676                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
6677                    let needed: i64 = signed_offset.abs();
6678                    if needed == 0 {
6679                        values[i].clone()
6680                    } else {
6681                        let mut j: i64 = i as i64;
6682                        let mut hits: i64 = 0;
6683                        let mut found: Option<Value> = None;
6684                        loop {
6685                            j += step;
6686                            if j < 0 || j >= n as i64 {
6687                                break;
6688                            }
6689                            #[allow(clippy::cast_sign_loss)]
6690                            let v = &values[j as usize];
6691                            if !v.is_null() {
6692                                hits += 1;
6693                                if hits == needed {
6694                                    found = Some(v.clone());
6695                                    break;
6696                                }
6697                            }
6698                        }
6699                        found.unwrap_or_else(|| default.clone())
6700                    }
6701                } else {
6702                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
6703                    if target_signed < 0 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX) {
6704                        default.clone()
6705                    } else {
6706                        #[allow(clippy::cast_sign_loss)]
6707                        {
6708                            values[target_signed as usize].clone()
6709                        }
6710                    }
6711                };
6712                out_vals[*idx] = v;
6713            }
6714            Ok(())
6715        }
6716        "first_value" | "last_value" | "nth_value" => {
6717            if args.is_empty() {
6718                return Err(EngineError::Unsupported(alloc::format!(
6719                    "{lower}() requires at least one argument"
6720                )));
6721            }
6722            let values: Vec<Value> = slice
6723                .iter()
6724                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
6725                .collect::<Result<_, _>>()
6726                .map_err(EngineError::Eval)?;
6727            let nth: usize = if lower == "nth_value" {
6728                if args.len() < 2 {
6729                    return Err(EngineError::Unsupported(
6730                        "nth_value() requires (expr, n)".into(),
6731                    ));
6732                }
6733                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
6734                    .map_err(EngineError::Eval)?;
6735                let raw = match v {
6736                    Value::SmallInt(n) => i64::from(n),
6737                    Value::Int(n) => i64::from(n),
6738                    Value::BigInt(n) => n,
6739                    _ => {
6740                        return Err(EngineError::Unsupported(
6741                            "nth_value() n must be integer".into(),
6742                        ));
6743                    }
6744                };
6745                if raw < 1 {
6746                    return Err(EngineError::Unsupported(
6747                        "nth_value() n must be >= 1".into(),
6748                    ));
6749                }
6750                #[allow(clippy::cast_sign_loss)]
6751                {
6752                    raw as usize
6753                }
6754            } else {
6755                0
6756            };
6757            let eff = effective_frame(frame, ordered)?;
6758            for i in 0..slice.len() {
6759                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
6760                let (_, _, idx) = &slice[i];
6761                let v = if lo > hi {
6762                    Value::Null
6763                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
6764                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
6765                    // selecting the boundary value within the frame.
6766                    if lower == "first_value" {
6767                        (lo..=hi)
6768                            .find_map(|j| {
6769                                let v = &values[j];
6770                                (!v.is_null()).then(|| v.clone())
6771                            })
6772                            .unwrap_or(Value::Null)
6773                    } else {
6774                        (lo..=hi)
6775                            .rev()
6776                            .find_map(|j| {
6777                                let v = &values[j];
6778                                (!v.is_null()).then(|| v.clone())
6779                            })
6780                            .unwrap_or(Value::Null)
6781                    }
6782                } else {
6783                    match lower.as_str() {
6784                        "first_value" => values[lo].clone(),
6785                        "last_value" => values[hi].clone(),
6786                        "nth_value" => {
6787                            let pos = lo + nth - 1;
6788                            if pos > hi {
6789                                Value::Null
6790                            } else {
6791                                values[pos].clone()
6792                            }
6793                        }
6794                        _ => unreachable!(),
6795                    }
6796                };
6797                out_vals[*idx] = v;
6798            }
6799            Ok(())
6800        }
6801        "ntile" => {
6802            if args.is_empty() {
6803                return Err(EngineError::Unsupported(
6804                    "ntile(n) requires an integer argument".into(),
6805                ));
6806            }
6807            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
6808                .map_err(EngineError::Eval)?;
6809            let bucket_count: i64 = match v {
6810                Value::SmallInt(n) => i64::from(n),
6811                Value::Int(n) => i64::from(n),
6812                Value::BigInt(n) => n,
6813                _ => {
6814                    return Err(EngineError::Unsupported(
6815                        "ntile() argument must be integer".into(),
6816                    ));
6817                }
6818            };
6819            if bucket_count < 1 {
6820                return Err(EngineError::Unsupported(
6821                    "ntile() argument must be >= 1".into(),
6822                ));
6823            }
6824            #[allow(clippy::cast_sign_loss)]
6825            let buckets = bucket_count as usize;
6826            let n = slice.len();
6827            // Each bucket gets `base` rows; the first `extras` buckets
6828            // get one extra. PG semantics.
6829            let base = n / buckets;
6830            let extras = n % buckets;
6831            let mut bucket: usize = 1;
6832            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
6833            let mut buckets_with_extra_remaining = extras;
6834            for (_, _, idx) in slice {
6835                if remaining_in_bucket == 0 {
6836                    bucket += 1;
6837                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
6838                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
6839                        base + 1
6840                    } else {
6841                        base
6842                    };
6843                    // Edge: if base==0 and extras==0, all rows fit;
6844                    // shouldn't reach here, but guard anyway.
6845                    if remaining_in_bucket == 0 {
6846                        remaining_in_bucket = 1;
6847                    }
6848                }
6849                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
6850                remaining_in_bucket -= 1;
6851            }
6852            Ok(())
6853        }
6854        "percent_rank" => {
6855            // (rank - 1) / (n - 1) where rank is the standard RANK().
6856            // Single-row partitions get 0.
6857            let n = slice.len();
6858            let mut prev_key: Option<&[(Value, bool)]> = None;
6859            let mut current_rank: i64 = 1;
6860            for (i, (_, okey, idx)) in slice.iter().enumerate() {
6861                if let Some(p) = prev_key
6862                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
6863                {
6864                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
6865                }
6866                if prev_key.is_none() {
6867                    current_rank = 1;
6868                }
6869                #[allow(clippy::cast_precision_loss)]
6870                let pr = if n <= 1 {
6871                    0.0
6872                } else {
6873                    (current_rank - 1) as f64 / (n - 1) as f64
6874                };
6875                out_vals[*idx] = Value::Float(pr);
6876                prev_key = Some(okey.as_slice());
6877            }
6878            Ok(())
6879        }
6880        "cume_dist" => {
6881            // # rows up to and including this row's peer group / n.
6882            let n = slice.len();
6883            // First pass: find peer-group-end rank for each row.
6884            for i in 0..slice.len() {
6885                let peer_end = peer_group_end(slice, i);
6886                #[allow(clippy::cast_precision_loss)]
6887                let cd = (peer_end + 1) as f64 / n as f64;
6888                let (_, _, idx) = &slice[i];
6889                out_vals[*idx] = Value::Float(cd);
6890            }
6891            Ok(())
6892        }
6893        other => Err(EngineError::Unsupported(alloc::format!(
6894            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
6895        ))),
6896    }
6897}
6898
6899/// v4.20: resolve the user-provided frame down to a normalised
6900/// `(kind, start, end)`. `None` means default — derive from
6901/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
6902/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
6903/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
6904/// end → CURRENT ROW per the PG spec.
6905fn effective_frame(
6906    frame: Option<&WindowFrame>,
6907    ordered: bool,
6908) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
6909    match frame {
6910        None => {
6911            if ordered {
6912                Ok((
6913                    FrameKind::Range,
6914                    FrameBound::UnboundedPreceding,
6915                    FrameBound::CurrentRow,
6916                ))
6917            } else {
6918                Ok((
6919                    FrameKind::Rows,
6920                    FrameBound::UnboundedPreceding,
6921                    FrameBound::UnboundedFollowing,
6922                ))
6923            }
6924        }
6925        Some(fr) => {
6926            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
6927            // Reject start > end (a few impossible combinations).
6928            if matches!(fr.start, FrameBound::UnboundedFollowing)
6929                || matches!(end, FrameBound::UnboundedPreceding)
6930            {
6931                return Err(EngineError::Unsupported(alloc::format!(
6932                    "invalid frame: start={:?} end={:?}",
6933                    fr.start,
6934                    end
6935                )));
6936            }
6937            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
6938            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
6939            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
6940            // implemented in v4.20.
6941            if fr.kind == FrameKind::Range
6942                && (matches!(
6943                    fr.start,
6944                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
6945                ) || matches!(
6946                    end,
6947                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
6948                ))
6949            {
6950                return Err(EngineError::Unsupported(
6951                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
6952                ));
6953            }
6954            Ok((fr.kind, fr.start.clone(), end))
6955        }
6956    }
6957}
6958
6959/// Compute `(lo, hi)` row-index bounds inside the partition slice
6960/// for the row at position `i`. Inclusive, clamped to
6961/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
6962#[allow(clippy::type_complexity)]
6963fn frame_bounds_for_row(
6964    eff: &(FrameKind, FrameBound, FrameBound),
6965    i: usize,
6966    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
6967) -> (usize, usize) {
6968    let (kind, start, end) = eff;
6969    let n = slice.len();
6970    let last = n.saturating_sub(1);
6971    let (mut lo, mut hi) = match kind {
6972        FrameKind::Rows => {
6973            let lo = match start {
6974                FrameBound::UnboundedPreceding => 0,
6975                FrameBound::OffsetPreceding(k) => {
6976                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
6977                    i.saturating_sub(k)
6978                }
6979                FrameBound::CurrentRow => i,
6980                FrameBound::OffsetFollowing(k) => {
6981                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
6982                    i.saturating_add(k).min(last)
6983                }
6984                FrameBound::UnboundedFollowing => last,
6985            };
6986            let hi = match end {
6987                FrameBound::UnboundedPreceding => 0,
6988                FrameBound::OffsetPreceding(k) => {
6989                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
6990                    i.saturating_sub(k)
6991                }
6992                FrameBound::CurrentRow => i,
6993                FrameBound::OffsetFollowing(k) => {
6994                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
6995                    i.saturating_add(k).min(last)
6996                }
6997                FrameBound::UnboundedFollowing => last,
6998            };
6999            (lo, hi)
7000        }
7001        FrameKind::Range => {
7002            // RANGE bounds are peer-aware. With only UNBOUNDED and
7003            // CURRENT ROW supported (rejected at effective_frame for
7004            // explicit offsets), the start/end map to the
7005            // partition's full extent at the same-order-key peer
7006            // group boundary.
7007            let lo = match start {
7008                FrameBound::UnboundedPreceding => 0,
7009                FrameBound::CurrentRow => peer_group_start(slice, i),
7010                FrameBound::UnboundedFollowing => last,
7011                _ => unreachable!("offset bounds rejected for RANGE"),
7012            };
7013            let hi = match end {
7014                FrameBound::UnboundedPreceding => 0,
7015                FrameBound::CurrentRow => peer_group_end(slice, i),
7016                FrameBound::UnboundedFollowing => last,
7017                _ => unreachable!("offset bounds rejected for RANGE"),
7018            };
7019            (lo, hi)
7020        }
7021    };
7022    if hi >= n {
7023        hi = last;
7024    }
7025    if lo >= n {
7026        lo = last;
7027    }
7028    (lo, hi)
7029}
7030
7031/// Find the inclusive index of the first row with the same ORDER
7032/// BY key as `slice[i]`. Slice is already sorted by partition then
7033/// order, so peers are contiguous.
7034#[allow(clippy::type_complexity)]
7035fn peer_group_start(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
7036    let key = &slice[i].1;
7037    let mut j = i;
7038    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
7039        j -= 1;
7040    }
7041    j
7042}
7043
7044/// Find the inclusive index of the last row with the same ORDER
7045/// BY key as `slice[i]`.
7046#[allow(clippy::type_complexity)]
7047fn peer_group_end(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
7048    let key = &slice[i].1;
7049    let mut j = i;
7050    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
7051        j += 1;
7052    }
7053    j
7054}
7055
7056fn value_to_f64(v: &Value) -> Option<f64> {
7057    match v {
7058        Value::SmallInt(n) => Some(f64::from(*n)),
7059        Value::Int(n) => Some(f64::from(*n)),
7060        #[allow(clippy::cast_precision_loss)]
7061        Value::BigInt(n) => Some(*n as f64),
7062        Value::Float(x) => Some(*x),
7063        _ => None,
7064    }
7065}
7066
7067/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
7068/// projection / `order_by` — saves cloning the AST when there are
7069/// none (the common case).
7070fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
7071    let mut any = false;
7072    for item in &stmt.items {
7073        if let SelectItem::Expr { expr, .. } = item {
7074            any = any || expr_has_subquery(expr);
7075        }
7076    }
7077    if let Some(w) = &stmt.where_ {
7078        any = any || expr_has_subquery(w);
7079    }
7080    if let Some(h) = &stmt.having {
7081        any = any || expr_has_subquery(h);
7082    }
7083    for o in &stmt.order_by {
7084        any = any || expr_has_subquery(&o.expr);
7085    }
7086    for (_, peer) in &stmt.unions {
7087        any = any || expr_tree_has_subquery(peer);
7088    }
7089    any
7090}
7091
7092fn expr_has_subquery(e: &Expr) -> bool {
7093    match e {
7094        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
7095        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
7096        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7097            expr_has_subquery(expr)
7098        }
7099        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
7100        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
7101        Expr::Extract { source, .. } => expr_has_subquery(source),
7102        Expr::WindowFunction {
7103            args,
7104            partition_by,
7105            order_by,
7106            ..
7107        } => {
7108            args.iter().any(expr_has_subquery)
7109                || partition_by.iter().any(expr_has_subquery)
7110                || order_by.iter().any(|(e, _)| expr_has_subquery(e))
7111        }
7112        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
7113        Expr::Array(items) => items.iter().any(expr_has_subquery),
7114        Expr::ArraySubscript { target, index } => {
7115            expr_has_subquery(target) || expr_has_subquery(index)
7116        }
7117        Expr::AnyAll { expr, array, .. } => expr_has_subquery(expr) || expr_has_subquery(array),
7118    }
7119}
7120
7121/// v4.10 helper: materialise a runtime `Value` back into an AST
7122/// `Expr::Literal` for the subquery-rewrite path. Supports the
7123/// types `Literal` can represent (Integer / Float / Text / Bool /
7124/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
7125/// would lose precision through Literal and aren't supported in
7126/// uncorrelated-subquery results; they error with a clear hint.
7127fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
7128    let lit = match v {
7129        Value::Null => Literal::Null,
7130        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
7131        Value::Int(n) => Literal::Integer(i64::from(n)),
7132        Value::BigInt(n) => Literal::Integer(n),
7133        Value::Float(x) => Literal::Float(x),
7134        Value::Text(s) | Value::Json(s) => Literal::String(s),
7135        Value::Bool(b) => Literal::Bool(b),
7136        other => {
7137            return Err(EngineError::Unsupported(alloc::format!(
7138                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
7139                other.data_type()
7140            )));
7141        }
7142    };
7143    Ok(Expr::Literal(lit))
7144}
7145
7146/// v6.1.1 — walk the prepared `Statement` AST and replace every
7147/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
7148/// params[n-1]))`. The dispatch downstream sees a `Statement`
7149/// indistinguishable from a simple-query parse, so the exec path
7150/// stays unchanged.
7151///
7152/// Errors fall into one shape: a `$N` references past the bound
7153/// `params.len()`. Out-of-range happens when the Bind didn't
7154/// supply enough values; pgwire surfaces this as a protocol error
7155/// to the client.
7156fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
7157    match stmt {
7158        Statement::Select(s) => substitute_select(s, params)?,
7159        Statement::Insert(ins) => {
7160            for row in &mut ins.rows {
7161                for e in row {
7162                    substitute_expr(e, params)?;
7163                }
7164            }
7165        }
7166        Statement::Update(u) => {
7167            for (_, e) in &mut u.assignments {
7168                substitute_expr(e, params)?;
7169            }
7170            if let Some(w) = &mut u.where_ {
7171                substitute_expr(w, params)?;
7172            }
7173        }
7174        Statement::Delete(d) => {
7175            if let Some(w) = &mut d.where_ {
7176                substitute_expr(w, params)?;
7177            }
7178        }
7179        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
7180        // Other statements (CREATE / BEGIN / SHOW / …) have no
7181        // expression slots; no walk needed.
7182        _ => {}
7183    }
7184    Ok(())
7185}
7186
7187fn substitute_select(s: &mut SelectStatement, params: &[Value]) -> Result<(), EngineError> {
7188    for item in &mut s.items {
7189        if let SelectItem::Expr { expr, .. } = item {
7190            substitute_expr(expr, params)?;
7191        }
7192    }
7193    if let Some(w) = &mut s.where_ {
7194        substitute_expr(w, params)?;
7195    }
7196    if let Some(gs) = &mut s.group_by {
7197        for g in gs {
7198            substitute_expr(g, params)?;
7199        }
7200    }
7201    if let Some(h) = &mut s.having {
7202        substitute_expr(h, params)?;
7203    }
7204    for o in &mut s.order_by {
7205        substitute_expr(&mut o.expr, params)?;
7206    }
7207    for (_, peer) in &mut s.unions {
7208        substitute_select(peer, params)?;
7209    }
7210    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
7211    // mailrs H2. After this pass each LIMIT/OFFSET that was a
7212    // Placeholder is rewritten to Literal so the existing
7213    // `LimitExpr::as_literal` path consumes a concrete u32.
7214    if let Some(le) = s.limit {
7215        s.limit = Some(resolve_limit_placeholder(le, params)?);
7216    }
7217    if let Some(le) = s.offset {
7218        s.offset = Some(resolve_limit_placeholder(le, params)?);
7219    }
7220    Ok(())
7221}
7222
7223fn resolve_limit_placeholder(
7224    le: spg_sql::ast::LimitExpr,
7225    params: &[Value],
7226) -> Result<spg_sql::ast::LimitExpr, EngineError> {
7227    use spg_sql::ast::LimitExpr;
7228    match le {
7229        LimitExpr::Literal(_) => Ok(le),
7230        LimitExpr::Placeholder(n) => {
7231            let idx = usize::from(n).saturating_sub(1);
7232            let v = params.get(idx).ok_or_else(|| {
7233                EngineError::Eval(EvalError::PlaceholderOutOfRange {
7234                    n,
7235                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
7236                })
7237            })?;
7238            let int = match v {
7239                Value::SmallInt(x) => Some(i64::from(*x)),
7240                Value::Int(x) => Some(i64::from(*x)),
7241                Value::BigInt(x) => Some(*x),
7242                _ => None,
7243            }
7244            .ok_or_else(|| {
7245                EngineError::Unsupported(alloc::format!(
7246                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
7247                ))
7248            })?;
7249            if int < 0 {
7250                return Err(EngineError::Unsupported(alloc::format!(
7251                    "LIMIT/OFFSET ${n} bound to negative value {int}"
7252                )));
7253            }
7254            let bounded = u32::try_from(int).map_err(|_| {
7255                EngineError::Unsupported(alloc::format!(
7256                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
7257                ))
7258            })?;
7259            Ok(LimitExpr::Literal(bounded))
7260        }
7261    }
7262}
7263
7264fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
7265    if let Expr::Placeholder(n) = e {
7266        let idx = usize::from(*n).saturating_sub(1);
7267        let v = params.get(idx).ok_or_else(|| {
7268            EngineError::Eval(EvalError::PlaceholderOutOfRange {
7269                n: *n,
7270                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
7271            })
7272        })?;
7273        *e = Expr::Literal(value_to_literal(v.clone()));
7274        return Ok(());
7275    }
7276    match e {
7277        Expr::Binary { lhs, rhs, .. } => {
7278            substitute_expr(lhs, params)?;
7279            substitute_expr(rhs, params)?;
7280        }
7281        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7282            substitute_expr(expr, params)?;
7283        }
7284        Expr::FunctionCall { args, .. } => {
7285            for a in args {
7286                substitute_expr(a, params)?;
7287            }
7288        }
7289        Expr::Like { expr, pattern, .. } => {
7290            substitute_expr(expr, params)?;
7291            substitute_expr(pattern, params)?;
7292        }
7293        Expr::Extract { source, .. } => substitute_expr(source, params)?,
7294        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
7295        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
7296        Expr::InSubquery { expr, subquery, .. } => {
7297            substitute_expr(expr, params)?;
7298            substitute_select(subquery, params)?;
7299        }
7300        Expr::WindowFunction {
7301            args,
7302            partition_by,
7303            order_by,
7304            ..
7305        } => {
7306            for a in args {
7307                substitute_expr(a, params)?;
7308            }
7309            for p in partition_by {
7310                substitute_expr(p, params)?;
7311            }
7312            for (e, _) in order_by {
7313                substitute_expr(e, params)?;
7314            }
7315        }
7316        Expr::Literal(_) | Expr::Column(_) => {}
7317        // Already handled above.
7318        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
7319        Expr::Array(items) => {
7320            for elem in items {
7321                substitute_expr(elem, params)?;
7322            }
7323        }
7324        Expr::ArraySubscript { target, index } => {
7325            substitute_expr(target, params)?;
7326            substitute_expr(index, params)?;
7327        }
7328        Expr::AnyAll { expr, array, .. } => {
7329            substitute_expr(expr, params)?;
7330            substitute_expr(array, params)?;
7331        }
7332    }
7333    Ok(())
7334}
7335
7336/// v6.1.1 — convert a runtime `Value` into the closest matching
7337/// `Literal` for the substitute walker. Lossless for the simple
7338/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
7339/// / Json / Interval render as their canonical text form so the
7340/// downstream coerce_value can re-parse against the target column
7341/// type. SQ8 / HalfVector cells are NOT expected as bind params;
7342/// pgwire's Bind decodes vector params to the f32 representation
7343/// before they reach this helper.
7344/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
7345/// column's non-NULL sample before histogram building. Cross-type
7346/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
7347/// same widening the eval-side `compare` operator uses; everything
7348/// else (the genuinely-incompatible pairs) falls back to ordering
7349/// by canonical string form so the sort is still total + stable.
7350/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
7351/// here only via the string-fallback path because vector columns
7352/// are filtered out upstream.
7353fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
7354    use core::cmp::Ordering;
7355    match (a, b) {
7356        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
7357        (Value::Int(a), Value::Int(b)) => a.cmp(b),
7358        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
7359        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
7360        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
7361        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
7362        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
7363        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
7364        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
7365        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
7366        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
7367        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
7368        (Value::Date(a), Value::Date(b)) => a.cmp(b),
7369        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
7370        // Mixed numeric/float — widen to f64 and compare.
7371        (Value::SmallInt(n), Value::Float(x)) => {
7372            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
7373        }
7374        (Value::Float(x), Value::SmallInt(n)) => {
7375            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
7376        }
7377        (Value::Int(n), Value::Float(x)) => {
7378            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
7379        }
7380        (Value::Float(x), Value::Int(n)) => {
7381            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
7382        }
7383        (Value::BigInt(n), Value::Float(x)) => {
7384            #[allow(clippy::cast_precision_loss)]
7385            let nf = *n as f64;
7386            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
7387        }
7388        (Value::Float(x), Value::BigInt(n)) => {
7389            #[allow(clippy::cast_precision_loss)]
7390            let nf = *n as f64;
7391            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
7392        }
7393        // Cross-type fallback: lexicographic on canonical form.
7394        // Total + stable so the sort is well-defined.
7395        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
7396    }
7397}
7398
7399/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
7400/// string for the `spg_statistic.histogram_bounds` column. Values
7401/// containing `,` or `[` / `]` are JSON-style escaped so the
7402/// rendering round-trips through a future parser; v6.2.0 only
7403/// uses the rendered form for human consumption, so the escaping
7404/// is conservative.
7405fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
7406    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
7407    out.push('[');
7408    for (i, b) in bounds.iter().enumerate() {
7409        if i > 0 {
7410            out.push_str(", ");
7411        }
7412        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
7413        if needs_quote {
7414            out.push('"');
7415            for ch in b.chars() {
7416                if ch == '"' || ch == '\\' {
7417                    out.push('\\');
7418                }
7419                out.push(ch);
7420            }
7421            out.push('"');
7422        } else {
7423            out.push_str(b);
7424        }
7425    }
7426    out.push(']');
7427    out
7428}
7429
7430/// v6.2.0 — canonical textual form of a `Value` for histogram
7431/// bound storage. Strings used by ANALYZE for sort + bound output.
7432/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
7433/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
7434/// the same form `format_date` / `format_timestamp` produce for
7435/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
7436/// reach this only via a non-Vector column (vector columns are
7437/// skipped upstream); they fall back to a Debug-derived form so
7438/// stats still serialise without crashing.
7439pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
7440    match v {
7441        Value::Null => "NULL".to_string(),
7442        Value::SmallInt(n) => alloc::format!("{n}"),
7443        Value::Int(n) => alloc::format!("{n}"),
7444        Value::BigInt(n) => alloc::format!("{n}"),
7445        Value::Float(x) => alloc::format!("{x:?}"),
7446        Value::Text(s) | Value::Json(s) => s.clone(),
7447        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
7448        Value::Date(d) => eval::format_date(*d),
7449        Value::Timestamp(t) => eval::format_timestamp(*t),
7450        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
7451        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
7452        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
7453            // Unreachable in practice (vector columns are filtered
7454            // out before this). Defensive fallback so a future
7455            // vector-stats path doesn't crash.
7456            alloc::format!("{v:?}")
7457        }
7458        // v7.5.0 — Value is #[non_exhaustive] for downstream
7459        // forward-compat. Future variants fall through to Debug
7460        // form here (same shape as the vector fallback above).
7461        _ => alloc::format!("{v:?}"),
7462    }
7463}
7464
7465/// v6.2.0 — true for engine-managed catalog tables that the bare
7466/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
7467/// tables yet (publications / subscriptions / users / statistics
7468/// all live as engine fields, not catalog tables), so this is a
7469/// reserved future-proofing hook — every existing user table is
7470/// analysed.
7471const fn is_internal_table_name(_name: &str) -> bool {
7472    false
7473}
7474
7475fn value_to_literal(v: Value) -> Literal {
7476    match v {
7477        Value::Null => Literal::Null,
7478        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
7479        Value::Int(n) => Literal::Integer(i64::from(n)),
7480        Value::BigInt(n) => Literal::Integer(n),
7481        Value::Float(x) => Literal::Float(x),
7482        Value::Text(s) | Value::Json(s) => Literal::String(s),
7483        Value::Bool(b) => Literal::Bool(b),
7484        Value::Vector(v) => Literal::Vector(v),
7485        Value::Numeric { scaled, scale } => Literal::String(eval::format_numeric(scaled, scale)),
7486        Value::Date(d) => Literal::String(eval::format_date(d)),
7487        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
7488        Value::Interval { months, micros } => Literal::Interval {
7489            months,
7490            micros,
7491            text: eval::format_interval(months, micros),
7492        },
7493        // SQ8 / halfvec cells dequantise to f32 before reaching the
7494        // substitute walker; pgwire's Bind path handles that.
7495        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
7496        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
7497        // v7.5.0 — Value is #[non_exhaustive]; future variants
7498        // render as Debug-form String literal until explicit
7499        // mapping is added.
7500        v => Literal::String(alloc::format!("{v:?}")),
7501    }
7502}
7503
7504fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
7505    let Some(now) = now_micros else {
7506        return;
7507    };
7508    match stmt {
7509        Statement::Select(s) => rewrite_select_clock(s, now),
7510        Statement::Insert(ins) => {
7511            for row in &mut ins.rows {
7512                for e in row {
7513                    rewrite_expr_clock(e, now);
7514                }
7515            }
7516        }
7517        _ => {}
7518    }
7519}
7520
7521fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
7522    for item in &mut s.items {
7523        if let SelectItem::Expr { expr, .. } = item {
7524            rewrite_expr_clock(expr, now);
7525        }
7526    }
7527    if let Some(w) = &mut s.where_ {
7528        rewrite_expr_clock(w, now);
7529    }
7530    if let Some(gs) = &mut s.group_by {
7531        for g in gs {
7532            rewrite_expr_clock(g, now);
7533        }
7534    }
7535    if let Some(h) = &mut s.having {
7536        rewrite_expr_clock(h, now);
7537    }
7538    for o in &mut s.order_by {
7539        rewrite_expr_clock(&mut o.expr, now);
7540    }
7541    for (_, peer) in &mut s.unions {
7542        rewrite_select_clock(peer, now);
7543    }
7544}
7545
7546/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
7547/// Literal / Column-with-qualifier (the dominant cases on a typical
7548/// AST) take a single pattern dispatch and exit. The clock-rewrite
7549/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
7550/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
7551/// refs) sit on their own arms with match guards so the fall-through
7552/// to the recursive arms is unambiguous.
7553fn rewrite_expr_clock(e: &mut Expr, now: i64) {
7554    // Fast-path test on the no-recursion shapes first. We can't fold
7555    // them into the big match below because they need to *replace* `e`
7556    // outright; the recursive arms below match on its sub-fields.
7557    if let Some(replacement) = clock_replacement_for(e, now) {
7558        *e = replacement;
7559        return;
7560    }
7561    match e {
7562        Expr::Binary { lhs, rhs, .. } => {
7563            rewrite_expr_clock(lhs, now);
7564            rewrite_expr_clock(rhs, now);
7565        }
7566        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7567            rewrite_expr_clock(expr, now);
7568        }
7569        Expr::FunctionCall { args, .. } => {
7570            for a in args {
7571                rewrite_expr_clock(a, now);
7572            }
7573        }
7574        Expr::Like { expr, pattern, .. } => {
7575            rewrite_expr_clock(expr, now);
7576            rewrite_expr_clock(pattern, now);
7577        }
7578        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
7579        // v4.10 subquery nodes — recurse into the inner SELECT's
7580        // expression slots so e.g. SELECT NOW() in a scalar
7581        // subquery picks up the same instant as the outer query.
7582        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
7583        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
7584        Expr::InSubquery { expr, subquery, .. } => {
7585            rewrite_expr_clock(expr, now);
7586            rewrite_select_clock(subquery, now);
7587        }
7588        // v4.12 window functions — args + PARTITION BY + ORDER BY
7589        // may all reference clock literals.
7590        Expr::WindowFunction {
7591            args,
7592            partition_by,
7593            order_by,
7594            ..
7595        } => {
7596            for a in args {
7597                rewrite_expr_clock(a, now);
7598            }
7599            for p in partition_by {
7600                rewrite_expr_clock(p, now);
7601            }
7602            for (e, _) in order_by {
7603                rewrite_expr_clock(e, now);
7604            }
7605        }
7606        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
7607        Expr::Array(items) => {
7608            for elem in items {
7609                rewrite_expr_clock(elem, now);
7610            }
7611        }
7612        Expr::ArraySubscript { target, index } => {
7613            rewrite_expr_clock(target, now);
7614            rewrite_expr_clock(index, now);
7615        }
7616        Expr::AnyAll { expr, array, .. } => {
7617            rewrite_expr_clock(expr, now);
7618            rewrite_expr_clock(array, now);
7619        }
7620    }
7621}
7622
7623/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
7624/// must be rewritten; otherwise `None` so the caller falls through to
7625/// the recursive walk. Identifies both function-call forms (`NOW()` /
7626/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
7627/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
7628/// which is how PG accepts them without parens).
7629fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
7630    let (kind, name) = match e {
7631        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
7632        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
7633        _ => return None,
7634    };
7635    // ASCII case-insensitive name match. Limited to the three keywords
7636    // that actually need rewriting.
7637    let matched = match name.len() {
7638        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => Some(true),
7639        12 if name.eq_ignore_ascii_case("current_date") => Some(false),
7640        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(true),
7641        _ => None,
7642    };
7643    let is_timestamp = matched?;
7644    let payload = if is_timestamp {
7645        now
7646    } else {
7647        now.div_euclid(86_400_000_000)
7648    };
7649    let target = if is_timestamp {
7650        spg_sql::ast::CastTarget::Timestamp
7651    } else {
7652        spg_sql::ast::CastTarget::Date
7653    };
7654    Some(Expr::Cast {
7655        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
7656        target,
7657    })
7658}
7659
7660#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7661enum ClockSite {
7662    Fn,
7663    BareIdent,
7664}
7665
7666/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
7667/// Swap the integer literal for the matching item's expression so the
7668/// executor doesn't need a special-case branch. Recurses into UNION
7669/// peers because each peer keeps its own SELECT list.
7670/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
7671/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
7672/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
7673/// and groups by whatever explicit non-aggregates remain — none in
7674/// the wildcard-only case, which still works for non-aggregate
7675/// queries).
7676fn expand_group_by_all(s: &mut SelectStatement) {
7677    if !s.group_by_all {
7678        for (_, peer) in &mut s.unions {
7679            expand_group_by_all(peer);
7680        }
7681        return;
7682    }
7683    let mut groups: Vec<Expr> = Vec::new();
7684    for item in &s.items {
7685        if let SelectItem::Expr { expr, .. } = item
7686            && !aggregate::contains_aggregate(expr)
7687        {
7688            groups.push(expr.clone());
7689        }
7690    }
7691    s.group_by = Some(groups);
7692    s.group_by_all = false;
7693    for (_, peer) in &mut s.unions {
7694        expand_group_by_all(peer);
7695    }
7696}
7697
7698fn resolve_order_by_position(s: &mut SelectStatement) {
7699    // v6.4.0 — iterate every ORDER BY key. Position references
7700    // (`ORDER BY 2`) bind to the 1-based projection index;
7701    // identifier references that match a SELECT-list alias bind to
7702    // the projected expression (Step 4 of L3a).
7703    for order in &mut s.order_by {
7704        match &order.expr {
7705            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
7706                if let Ok(idx_one_based) = usize::try_from(*n) {
7707                    let idx = idx_one_based - 1;
7708                    if idx < s.items.len()
7709                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
7710                    {
7711                        order.expr = expr.clone();
7712                    }
7713                }
7714            }
7715            Expr::Column(c) if c.qualifier.is_none() => {
7716                // Alias-in-ORDER-BY lookup.
7717                for item in &s.items {
7718                    if let SelectItem::Expr {
7719                        expr,
7720                        alias: Some(a),
7721                    } = item
7722                        && a == &c.name
7723                    {
7724                        order.expr = expr.clone();
7725                        break;
7726                    }
7727                }
7728            }
7729            _ => {}
7730        }
7731    }
7732    for (_, peer) in &mut s.unions {
7733        resolve_order_by_position(peer);
7734    }
7735}
7736
7737/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
7738/// Used by the UNION ORDER BY path; per-block paths inline the same
7739/// comparator because they already hold `&OrderBy` directly.
7740/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
7741/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
7742/// partition the prefix in O(n), then sort just that prefix in O(k
7743/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
7744/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
7745/// full-sort behaviour.
7746///
7747/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
7748/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
7749fn partial_sort_tagged(tagged: &mut Vec<(Vec<f64>, Row)>, keep: Option<usize>, descs: &[bool]) {
7750    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
7751    match keep {
7752        Some(k) if k < tagged.len() && k > 0 => {
7753            let pivot = k - 1;
7754            tagged.select_nth_unstable_by(pivot, cmp);
7755            tagged[..k].sort_by(cmp);
7756            tagged.truncate(k);
7757        }
7758        _ => {
7759            tagged.sort_by(cmp);
7760        }
7761    }
7762}
7763
7764fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
7765    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
7766}
7767
7768/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
7769/// flag is honored independently. NULL is encoded as `f64::INFINITY`
7770/// so it sorts last in ASC and first in DESC (matches PG default).
7771fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
7772    use core::cmp::Ordering;
7773    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
7774        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
7775        let ord = if descs.get(i).copied().unwrap_or(false) {
7776            ord.reverse()
7777        } else {
7778            ord
7779        };
7780        if ord != Ordering::Equal {
7781            return ord;
7782        }
7783    }
7784    Ordering::Equal
7785}
7786
7787/// v6.4.0 — eval every ORDER BY expression for a row and pack the
7788/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
7789fn build_order_keys(
7790    order_by: &[OrderBy],
7791    row: &Row,
7792    ctx: &EvalContext,
7793) -> Result<Vec<f64>, EngineError> {
7794    let mut keys = Vec::with_capacity(order_by.len());
7795    for o in order_by {
7796        let v = eval::eval_expr(&o.expr, row, ctx)?;
7797        keys.push(value_to_order_key(&v)?);
7798    }
7799    Ok(keys)
7800}
7801
7802/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
7803/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
7804/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
7805fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
7806    if let Some(off) = offset {
7807        let off = off as usize;
7808        if off >= rows.len() {
7809            rows.clear();
7810        } else {
7811            rows.drain(..off);
7812        }
7813    }
7814    if let Some(n) = limit {
7815        rows.truncate(n as usize);
7816    }
7817}
7818
7819/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
7820/// names + parent table name) into the storage-layer shape (column
7821/// indices + same parent table). Validates everything the engine
7822/// needs to know about the FK at CREATE TABLE time:
7823///
7824///   - parent table exists (catalog lookup, unless self-referencing)
7825///   - parent columns exist on the parent table
7826///   - parent column list matches the local arity (defaults to the
7827///     parent's primary index column when omitted)
7828///   - parent columns are covered by a `BTree` UNIQUE-class index
7829///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
7830///     the v7.6.2 INSERT path can do an O(log n) parent lookup
7831///   - local columns exist on the table being created
7832fn resolve_foreign_key(
7833    local_table_name: &str,
7834    local_cols: &[ColumnSchema],
7835    fk: spg_sql::ast::ForeignKeyConstraint,
7836    catalog: &Catalog,
7837) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
7838    // Resolve local columns.
7839    let mut local_columns = Vec::with_capacity(fk.columns.len());
7840    for name in &fk.columns {
7841        let pos = local_cols
7842            .iter()
7843            .position(|c| c.name == *name)
7844            .ok_or_else(|| {
7845                EngineError::Unsupported(alloc::format!(
7846                    "FOREIGN KEY references unknown local column {name:?}"
7847                ))
7848            })?;
7849        local_columns.push(pos);
7850    }
7851    // Self-referencing FK: parent table is the one we're creating.
7852    // The parent column resolution uses the local column list since
7853    // the catalog doesn't have this table yet.
7854    let is_self_ref = fk.parent_table == local_table_name;
7855    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
7856        (local_cols, local_table_name)
7857    } else {
7858        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
7859            EngineError::Storage(StorageError::TableNotFound {
7860                name: fk.parent_table.clone(),
7861            })
7862        })?;
7863        (
7864            parent_table.schema().columns.as_slice(),
7865            fk.parent_table.as_str(),
7866        )
7867    };
7868    // Resolve parent column names → positions. If the FK omitted the
7869    // parent column list, fall back to the parent's primary index
7870    // column (single-column only — composite default is rejected
7871    // because there's no unambiguous "PK" in SPG's index list).
7872    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
7873        if fk.columns.len() != 1 {
7874            return Err(EngineError::Unsupported(
7875                "composite FOREIGN KEY without explicit parent column list is not supported \
7876                 — list the parent columns explicitly"
7877                    .into(),
7878            ));
7879        }
7880        // Find a single BTree index on the parent and use its column.
7881        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
7882            .ok_or_else(|| {
7883                EngineError::Unsupported(alloc::format!(
7884                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
7885                     to default the FOREIGN KEY against"
7886                ))
7887            })?;
7888        alloc::vec![pos]
7889    } else {
7890        let mut out = Vec::with_capacity(fk.parent_columns.len());
7891        for name in &fk.parent_columns {
7892            let pos = parent_cols_for_lookup
7893                .iter()
7894                .position(|c| c.name == *name)
7895                .ok_or_else(|| {
7896                    EngineError::Unsupported(alloc::format!(
7897                        "FOREIGN KEY references unknown parent column \
7898                         {name:?} on table {parent_table_str:?}"
7899                    ))
7900                })?;
7901            out.push(pos);
7902        }
7903        out
7904    };
7905    if parent_columns.len() != local_columns.len() {
7906        return Err(EngineError::Unsupported(alloc::format!(
7907            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
7908            local_columns.len(),
7909            parent_columns.len()
7910        )));
7911    }
7912    // For non-self-referencing FKs, verify the parent column set is
7913    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
7914    // declaration; the convention is "the parent column for FK
7915    // purposes must have a BTree index" — which the user creates via
7916    // `CREATE INDEX ... USING btree (col)` (the default). We accept
7917    // any single-column BTree index that covers a parent column;
7918    // composite parent column lists require an index whose `column_position`
7919    // matches the first parent column (multi-column BTree indices
7920    // are not in the v7.x roadmap).
7921    if !is_self_ref {
7922        let parent_table = catalog.get(&fk.parent_table).expect("checked above");
7923        let primary_parent_col = parent_columns[0];
7924        let has_btree = parent_table
7925            .schema()
7926            .columns
7927            .get(primary_parent_col)
7928            .is_some()
7929            && parent_table.indices().iter().any(|idx| {
7930                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
7931                    && idx.column_position == primary_parent_col
7932                    && idx.partial_predicate.is_none()
7933            });
7934        if !has_btree {
7935            return Err(EngineError::Unsupported(alloc::format!(
7936                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
7937                 index — create one with `CREATE INDEX ... ON {} ({})` first",
7938                parent_table_str,
7939                parent_table_str,
7940                parent_table.schema().columns[primary_parent_col].name,
7941            )));
7942        }
7943    }
7944    let on_delete = fk_action_sql_to_storage(fk.on_delete);
7945    let on_update = fk_action_sql_to_storage(fk.on_update);
7946    Ok(spg_storage::ForeignKeyConstraint {
7947        name: fk.name,
7948        local_columns,
7949        parent_table: fk.parent_table,
7950        parent_columns,
7951        on_delete,
7952        on_update,
7953    })
7954}
7955
7956/// v7.6.1 — pick a sentinel "primary key" column from the parent
7957/// table when the FK didn't name parent columns. Picks the first
7958/// single-column unconditional BTree index — that's the closest
7959/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
7960/// `local_cols` as the column source.
7961fn pick_pk_index_column(
7962    catalog: &Catalog,
7963    parent_name: &str,
7964    is_self_ref: bool,
7965    local_cols: &[ColumnSchema],
7966) -> Option<usize> {
7967    if is_self_ref {
7968        // Self-ref FK omitted parent columns: pick column 0 by
7969        // convention (no catalog entry yet). Engine will widen this
7970        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
7971        let _ = local_cols;
7972        return Some(0);
7973    }
7974    let parent = catalog.get(parent_name)?;
7975    parent.indices().iter().find_map(|idx| {
7976        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
7977            && idx.partial_predicate.is_none()
7978            && idx.included_columns.is_empty()
7979            && idx.expression.is_none()
7980        {
7981            Some(idx.column_position)
7982        } else {
7983            None
7984        }
7985    })
7986}
7987
7988/// v7.9.8 / v7.9.10 — resolve the column positions that
7989/// identify a conflict for ON CONFLICT. Returns a Vec of
7990/// column positions (1 element for single-column form, N for
7991/// composite). When the user wrote bare `ON CONFLICT DO …`,
7992/// falls back to the table's first unconditional BTree index
7993/// (always single-column today).
7994fn resolve_on_conflict_columns(
7995    catalog: &Catalog,
7996    table_name: &str,
7997    target: &[String],
7998) -> Result<Vec<usize>, EngineError> {
7999    let table = catalog.get(table_name).ok_or_else(|| {
8000        EngineError::Storage(StorageError::TableNotFound {
8001            name: table_name.into(),
8002        })
8003    })?;
8004    if target.is_empty() {
8005        let pos = table
8006            .indices()
8007            .iter()
8008            .find_map(|idx| {
8009                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
8010                    && idx.partial_predicate.is_none()
8011                    && idx.included_columns.is_empty()
8012                    && idx.expression.is_none()
8013                {
8014                    Some(idx.column_position)
8015                } else {
8016                    None
8017                }
8018            })
8019            .ok_or_else(|| {
8020                EngineError::Unsupported(alloc::format!(
8021                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
8022                ))
8023            })?;
8024        return Ok(alloc::vec![pos]);
8025    }
8026    let mut out = Vec::with_capacity(target.len());
8027    for name in target {
8028        let pos = table
8029            .schema()
8030            .columns
8031            .iter()
8032            .position(|c| c.name == *name)
8033            .ok_or_else(|| {
8034                EngineError::Unsupported(alloc::format!(
8035                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
8036                ))
8037            })?;
8038        out.push(pos);
8039    }
8040    Ok(out)
8041}
8042
8043/// v7.9.8 — check whether the BTree index on `column_pos` of
8044/// `table_name` already has a row with this key.
8045fn on_conflict_key_exists(
8046    catalog: &Catalog,
8047    table_name: &str,
8048    column_pos: usize,
8049    key: &Value,
8050) -> bool {
8051    let Some(table) = catalog.get(table_name) else {
8052        return false;
8053    };
8054    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
8055        return false;
8056    };
8057    table.indices().iter().any(|idx| {
8058        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
8059            && idx.column_position == column_pos
8060            && idx.partial_predicate.is_none()
8061            && !idx.lookup_eq(&idx_key).is_empty()
8062    })
8063}
8064
8065/// v7.9.9 / v7.9.10 — look up an existing row's position by
8066/// matching all `column_positions` against the incoming `key`
8067/// tuple. Single-column shape (one column) reduces to the
8068/// canonical PK lookup; composite shapes scan linearly until
8069/// every position matches.
8070fn lookup_row_position_by_keys(
8071    catalog: &Catalog,
8072    table_name: &str,
8073    column_positions: &[usize],
8074    key: &[&Value],
8075) -> Option<usize> {
8076    let table = catalog.get(table_name)?;
8077    table.rows().iter().position(|r| {
8078        column_positions
8079            .iter()
8080            .enumerate()
8081            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
8082    })
8083}
8084
8085/// v7.9.10 — does the table already contain a row whose
8086/// `column_positions` tuple equals `key`? Single-column shape
8087/// uses the existing BTree fast path; composite shapes fall
8088/// back to a row scan.
8089fn on_conflict_keys_exist(
8090    catalog: &Catalog,
8091    table_name: &str,
8092    column_positions: &[usize],
8093    key: &[&Value],
8094) -> bool {
8095    if column_positions.len() == 1 {
8096        return on_conflict_key_exists(catalog, table_name, column_positions[0], key[0]);
8097    }
8098    let Some(table) = catalog.get(table_name) else {
8099        return false;
8100    };
8101    table.rows().iter().any(|r| {
8102        column_positions
8103            .iter()
8104            .enumerate()
8105            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
8106    })
8107}
8108
8109/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
8110/// existing row.
8111///
8112/// `incoming` is the rejected INSERT row (used to resolve
8113/// `EXCLUDED.col` references in the assignment exprs);
8114/// `target_pos` is the position of the existing row in the table.
8115/// Each assignment substitutes `EXCLUDED.col` with the matching
8116/// incoming value, evaluates the resulting expression against
8117/// the existing row, and writes the new value into the
8118/// corresponding column of the returned `Vec<Value>`. If
8119/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
8120/// the conflicting row is silently kept unchanged.
8121fn apply_on_conflict_assignments(
8122    catalog: &Catalog,
8123    table_name: &str,
8124    target_pos: usize,
8125    incoming: &[Value],
8126    assignments: &[(String, Expr)],
8127    where_: Option<&Expr>,
8128) -> Result<Option<Vec<Value>>, EngineError> {
8129    let table = catalog.get(table_name).ok_or_else(|| {
8130        EngineError::Storage(StorageError::TableNotFound {
8131            name: table_name.into(),
8132        })
8133    })?;
8134    let schema_cols = table.schema().columns.clone();
8135    let existing = table
8136        .rows()
8137        .get(target_pos)
8138        .ok_or_else(|| {
8139            EngineError::Unsupported(alloc::format!(
8140                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
8141            ))
8142        })?
8143        .clone();
8144    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
8145    // Optional WHERE filter on the conflict row.
8146    if let Some(w) = where_ {
8147        let pred = w.clone();
8148        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
8149        let v = eval::eval_expr(&pred, &existing, &ctx)?;
8150        if !matches!(v, Value::Bool(true)) {
8151            return Ok(None);
8152        }
8153    }
8154    let mut new_values = existing.values.clone();
8155    for (col_name, expr) in assignments {
8156        let target_idx = schema_cols
8157            .iter()
8158            .position(|c| c.name == *col_name)
8159            .ok_or_else(|| {
8160                EngineError::Eval(EvalError::ColumnNotFound {
8161                    name: col_name.clone(),
8162                })
8163            })?;
8164        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
8165        let v = eval::eval_expr(&sub, &existing, &ctx)?;
8166        new_values[target_idx] = coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
8167    }
8168    Ok(Some(new_values))
8169}
8170
8171/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
8172/// "EXCLUDED", name }` reference with a `Literal` of the matching
8173/// value from the incoming-row vec. Resolution against the
8174/// child-table column list (by name).
8175fn substitute_excluded_refs(expr: Expr, schema_cols: &[ColumnSchema], incoming: &[Value]) -> Expr {
8176    use spg_sql::ast::ColumnName;
8177    match expr {
8178        Expr::Column(ColumnName { qualifier, name })
8179            if qualifier
8180                .as_deref()
8181                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
8182        {
8183            let pos = schema_cols.iter().position(|c| c.name == name);
8184            match pos {
8185                Some(p) => {
8186                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
8187                    value_to_literal_expr(v)
8188                        .unwrap_or_else(|_| Expr::Literal(spg_sql::ast::Literal::Null))
8189                }
8190                None => Expr::Column(ColumnName { qualifier, name }),
8191            }
8192        }
8193        Expr::Binary { op, lhs, rhs } => Expr::Binary {
8194            op,
8195            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
8196            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
8197        },
8198        Expr::Unary { op, expr } => Expr::Unary {
8199            op,
8200            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
8201        },
8202        Expr::FunctionCall { name, args } => Expr::FunctionCall {
8203            name,
8204            args: args
8205                .into_iter()
8206                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
8207                .collect(),
8208        },
8209        other => other,
8210    }
8211}
8212
8213/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
8214/// about to be inserted into `child_table`, every FK declared on
8215/// that table is checked: the row's FK columns must either be
8216/// NULL (SQL spec skip) or match an existing parent row via the
8217/// parent's BTree PK / UNIQUE index.
8218///
8219/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
8220/// payload on first failure.
8221///
8222/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
8223/// == child_table`, the parent rows visible to this check are
8224///  (a) rows already committed to the table, plus
8225///  (b) earlier rows from the *same* `rows` batch.
8226/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
8227/// work in a single statement — common pattern for bulk-loading
8228/// hierarchies.
8229/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
8230/// constraints at INSERT time. For each constraint declared on
8231/// the target table, check that no existing row + no earlier row
8232/// in the same batch has the same full-column tuple. NULL in
8233/// any column lifts the row out of the check (SQL spec: NULL
8234/// ≠ NULL for uniqueness). mailrs G1 + G6.
8235fn enforce_uniqueness_inserts(
8236    catalog: &Catalog,
8237    child_table: &str,
8238    constraints: &[spg_storage::UniquenessConstraint],
8239    rows: &[Vec<Value>],
8240) -> Result<(), EngineError> {
8241    if constraints.is_empty() {
8242        return Ok(());
8243    }
8244    let table = catalog.get(child_table).ok_or_else(|| {
8245        EngineError::Storage(StorageError::TableNotFound {
8246            name: child_table.into(),
8247        })
8248    })?;
8249    for uc in constraints {
8250        for (batch_idx, row_values) in rows.iter().enumerate() {
8251            let key: Vec<&Value> = uc.columns.iter().map(|&i| &row_values[i]).collect();
8252            let has_null = key.iter().any(|v| matches!(v, Value::Null));
8253            if has_null {
8254                continue;
8255            }
8256            // Table-side collision: scan existing rows.
8257            let collides_in_table = table.rows().iter().any(|prow| {
8258                uc.columns
8259                    .iter()
8260                    .enumerate()
8261                    .all(|(i, &p)| prow.values.get(p) == Some(key[i]))
8262            });
8263            // Batch-side collision: earlier rows in the same INSERT.
8264            let collides_in_batch = rows[..batch_idx].iter().any(|earlier| {
8265                uc.columns
8266                    .iter()
8267                    .enumerate()
8268                    .all(|(i, &p)| earlier.get(p) == Some(key[i]))
8269            });
8270            if collides_in_table || collides_in_batch {
8271                let kind = if uc.is_primary_key {
8272                    "PRIMARY KEY"
8273                } else {
8274                    "UNIQUE"
8275                };
8276                let col_names: Vec<String> = uc
8277                    .columns
8278                    .iter()
8279                    .map(|&i| table.schema().columns[i].name.clone())
8280                    .collect();
8281                return Err(EngineError::Unsupported(alloc::format!(
8282                    "{kind} violation on {child_table:?} columns {col_names:?}: \
8283                     row #{batch_idx} duplicates an existing key"
8284                )));
8285            }
8286        }
8287    }
8288    Ok(())
8289}
8290
8291/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
8292/// WHERE-style predicate. NULL → false (three-valued logic
8293/// collapses to "skip this row" for index inclusion). Numeric
8294/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
8295/// Everything else (strings, vectors, JSON, …) is not a valid
8296/// predicate result and surfaces as `false` so a malformed
8297/// predicate degrades to "row not in index" rather than panicking.
8298fn predicate_truthy(v: &spg_storage::Value) -> bool {
8299    use spg_storage::Value as V;
8300    match v {
8301        V::Bool(b) => *b,
8302        V::Int(n) => *n != 0,
8303        V::BigInt(n) => *n != 0,
8304        V::SmallInt(n) => *n != 0,
8305        _ => false,
8306    }
8307}
8308
8309/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
8310/// committed rows for pre-existing duplicates. If any pair of rows
8311/// matches the predicate AND has the same index key, refuse to
8312/// create the index so the user fixes the data before retrying.
8313fn check_existing_unique_violation(
8314    idx: &spg_storage::Index,
8315    schema: &spg_storage::TableSchema,
8316    rows: &[spg_storage::Row],
8317) -> Result<(), EngineError> {
8318    let predicate_expr = match idx.partial_predicate.as_deref() {
8319        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
8320            EngineError::Unsupported(alloc::format!(
8321                "stored partial predicate {s:?} failed to re-parse: {e:?}"
8322            ))
8323        })?),
8324        None => None,
8325    };
8326    let ctx = eval::EvalContext::new(&schema.columns, None);
8327    let key_positions = unique_key_positions(idx);
8328    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
8329    for row in rows {
8330        if let Some(expr) = &predicate_expr {
8331            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
8332                EngineError::Unsupported(alloc::format!(
8333                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
8334                ))
8335            })?;
8336            if !predicate_truthy(&v) {
8337                continue;
8338            }
8339        }
8340        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
8341            .iter()
8342            .map(|&p| {
8343                row.values
8344                    .get(p)
8345                    .cloned()
8346                    .unwrap_or(spg_storage::Value::Null)
8347            })
8348            .collect();
8349        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
8350            continue;
8351        }
8352        if seen.iter().any(|other| *other == key) {
8353            return Err(EngineError::Unsupported(alloc::format!(
8354                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
8355                idx.name
8356            )));
8357        }
8358        seen.push(key);
8359    }
8360    Ok(())
8361}
8362
8363/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
8364/// extra positions). For single-column indexes this is just
8365/// `[column_position]`.
8366fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
8367    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
8368    out.push(idx.column_position);
8369    out.extend_from_slice(&idx.extra_column_positions);
8370    out
8371}
8372
8373/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
8374/// target table. For each, eval the index's optional predicate
8375/// against (a) the candidate row and (b) every committed row plus
8376/// earlier batch rows; only rows where the predicate is truthy
8377/// participate. A duplicate key among predicate-matching rows is a
8378/// uniqueness violation. NULL keys lift the row out of the check
8379/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
8380fn enforce_unique_index_inserts(
8381    catalog: &Catalog,
8382    table_name: &str,
8383    rows: &[alloc::vec::Vec<spg_storage::Value>],
8384) -> Result<(), EngineError> {
8385    let table = catalog.get(table_name).ok_or_else(|| {
8386        EngineError::Storage(StorageError::TableNotFound {
8387            name: table_name.into(),
8388        })
8389    })?;
8390    let schema = table.schema();
8391    let ctx = eval::EvalContext::new(&schema.columns, None);
8392    for idx in table.indices() {
8393        if !idx.is_unique {
8394            continue;
8395        }
8396        // Re-parse the predicate once per index per batch.
8397        let predicate_expr = match idx.partial_predicate.as_deref() {
8398            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
8399                EngineError::Unsupported(alloc::format!(
8400                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
8401                    idx.name
8402                ))
8403            })?),
8404            None => None,
8405        };
8406        let key_positions = unique_key_positions(idx);
8407        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
8408            key_positions
8409                .iter()
8410                .map(|&p| values.get(p).cloned().unwrap_or(spg_storage::Value::Null))
8411                .collect()
8412        };
8413        // Helper: does `values` participate in this index? (predicate
8414        // truthy when present.) Wraps `values` into a transient Row
8415        // because eval_expr requires &Row.
8416        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
8417            let Some(expr) = &predicate_expr else {
8418                return Ok(true);
8419            };
8420            let tmp_row = spg_storage::Row {
8421                values: values.to_vec(),
8422            };
8423            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
8424                EngineError::Unsupported(alloc::format!(
8425                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
8426                    idx.name
8427                ))
8428            })?;
8429            Ok(predicate_truthy(&v))
8430        };
8431        for (batch_idx, row_values) in rows.iter().enumerate() {
8432            if !participates(row_values)? {
8433                continue;
8434            }
8435            let key = key_of(row_values);
8436            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
8437                continue;
8438            }
8439            // Committed-table collision.
8440            for prow in table.rows() {
8441                if !participates(&prow.values)? {
8442                    continue;
8443                }
8444                if key_of(&prow.values) == key {
8445                    return Err(EngineError::Unsupported(alloc::format!(
8446                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
8447                         row #{batch_idx} duplicates an existing key",
8448                        idx.name
8449                    )));
8450                }
8451            }
8452            // Within-batch collision: earlier rows in the same INSERT.
8453            for earlier in &rows[..batch_idx] {
8454                if !participates(earlier)? {
8455                    continue;
8456                }
8457                if key_of(earlier) == key {
8458                    return Err(EngineError::Unsupported(alloc::format!(
8459                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
8460                         row #{batch_idx} duplicates an earlier row in the same batch",
8461                        idx.name
8462                    )));
8463                }
8464            }
8465        }
8466    }
8467    Ok(())
8468}
8469
8470fn enforce_fk_inserts(
8471    catalog: &Catalog,
8472    child_table: &str,
8473    fks: &[spg_storage::ForeignKeyConstraint],
8474    rows: &[Vec<Value>],
8475) -> Result<(), EngineError> {
8476    for fk in fks {
8477        let parent_is_self = fk.parent_table == child_table;
8478        let parent = if parent_is_self {
8479            // Self-ref: read the current state of the same table.
8480            // The mut borrow on child has been dropped by the caller.
8481            catalog.get(child_table).ok_or_else(|| {
8482                EngineError::Storage(StorageError::TableNotFound {
8483                    name: child_table.into(),
8484                })
8485            })?
8486        } else {
8487            catalog.get(&fk.parent_table).ok_or_else(|| {
8488                EngineError::Storage(StorageError::TableNotFound {
8489                    name: fk.parent_table.clone(),
8490                })
8491            })?
8492        };
8493        for (batch_idx, row_values) in rows.iter().enumerate() {
8494            // Single-column FK fast path: try the parent's BTree
8495            // index for an O(log n) lookup. Composite FKs fall back
8496            // to a parent-row scan.
8497            if fk.local_columns.len() == 1 {
8498                let v = &row_values[fk.local_columns[0]];
8499                if matches!(v, Value::Null) {
8500                    continue;
8501                }
8502                let parent_col = fk.parent_columns[0];
8503                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
8504                    EngineError::Unsupported(alloc::format!(
8505                        "FOREIGN KEY column value of type {:?} is not index-eligible",
8506                        v.data_type()
8507                    ))
8508                })?;
8509                let present_committed = parent.indices().iter().any(|idx| {
8510                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
8511                        && idx.column_position == parent_col
8512                        && idx.partial_predicate.is_none()
8513                        && !idx.lookup_eq(&key).is_empty()
8514                });
8515                // v7.6.7 self-ref widening: also accept a match
8516                // against earlier rows in this same batch when the
8517                // FK points at the table being inserted into.
8518                let present_in_batch = parent_is_self
8519                    && rows[..batch_idx]
8520                        .iter()
8521                        .any(|earlier| earlier.get(parent_col) == Some(v));
8522                if !(present_committed || present_in_batch) {
8523                    return Err(EngineError::Unsupported(alloc::format!(
8524                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
8525                        fk.parent_table,
8526                        parent
8527                            .schema()
8528                            .columns
8529                            .get(parent_col)
8530                            .map_or("?", |c| c.name.as_str()),
8531                        v,
8532                    )));
8533                }
8534            } else {
8535                // Composite FK: scan parent rows. v7.6.7 also
8536                // accepts a match against earlier rows in the same
8537                // batch (self-ref bulk-loading of hierarchies).
8538                if fk
8539                    .local_columns
8540                    .iter()
8541                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
8542                {
8543                    continue;
8544                }
8545                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
8546                let parent_match_committed = parent.rows().iter().any(|prow| {
8547                    fk.parent_columns
8548                        .iter()
8549                        .enumerate()
8550                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
8551                });
8552                let parent_match_in_batch = parent_is_self
8553                    && rows[..batch_idx].iter().any(|earlier| {
8554                        fk.parent_columns
8555                            .iter()
8556                            .enumerate()
8557                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
8558                    });
8559                if !(parent_match_committed || parent_match_in_batch) {
8560                    return Err(EngineError::Unsupported(alloc::format!(
8561                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
8562                        fk.parent_table,
8563                    )));
8564                }
8565            }
8566        }
8567    }
8568    Ok(())
8569}
8570
8571/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
8572/// DELETE on a parent. The plan is a list of these steps, stacked
8573/// across the FK graph by `plan_fk_parent_deletions`.
8574#[derive(Debug, Clone)]
8575struct FkChildStep {
8576    child_table: String,
8577    action: FkChildAction,
8578}
8579
8580#[derive(Debug, Clone)]
8581enum FkChildAction {
8582    /// CASCADE — remove these rows. Sorted, deduplicated positions.
8583    Delete { positions: Vec<usize> },
8584    /// SET NULL — for each (row, column) in the flat list, write
8585    /// NULL into that child cell. Multiple FKs on the same row may
8586    /// produce overlapping entries (deduped at plan time).
8587    SetNull {
8588        positions: Vec<usize>,
8589        columns: Vec<usize>,
8590    },
8591    /// SET DEFAULT — same shape as SetNull but writes the column's
8592    /// declared DEFAULT value (resolved at plan time). Columns
8593    /// without a DEFAULT raise an error during planning.
8594    SetDefault {
8595        positions: Vec<usize>,
8596        columns: Vec<usize>,
8597        defaults: Vec<Value>,
8598    },
8599}
8600
8601/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
8602///
8603/// Walks every table in the catalog looking for FKs whose
8604/// `parent_table` is `parent_table_name`. For each such FK + each
8605/// to-be-deleted parent row:
8606///
8607///   - RESTRICT / NoAction → error, no plan returned
8608///   - CASCADE → child rows get scheduled for deletion; recursive
8609///   - SetNull → child FK column(s) scheduled to be NULL-ed.
8610///     Verified NULL-able at plan time.
8611///   - SetDefault → child FK column(s) scheduled to be reset to
8612///     their declared DEFAULT. Columns without a DEFAULT raise.
8613///
8614/// SET NULL / SET DEFAULT do NOT cascade further — the child row
8615/// stays; only one of its columns mutates.
8616fn plan_fk_parent_deletions(
8617    catalog: &Catalog,
8618    parent_table_name: &str,
8619    to_delete_positions: &[usize],
8620    to_delete_rows: &[Vec<Value>],
8621) -> Result<Vec<FkChildStep>, EngineError> {
8622    use alloc::collections::{BTreeMap, BTreeSet};
8623    if to_delete_rows.is_empty() {
8624        return Ok(Vec::new());
8625    }
8626    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
8627    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
8628    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
8629    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
8630    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
8631    for &p in to_delete_positions {
8632        visited.insert((parent_table_name.to_string(), p));
8633    }
8634    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
8635        .iter()
8636        .map(|r| (parent_table_name.to_string(), r.clone()))
8637        .collect();
8638    while let Some((cur_parent, parent_row)) = work.pop() {
8639        for child_name in catalog.table_names() {
8640            let child = catalog
8641                .get(&child_name)
8642                .expect("table_names → catalog.get round-trip is total");
8643            for fk in &child.schema().foreign_keys {
8644                if fk.parent_table != cur_parent {
8645                    continue;
8646                }
8647                let parent_key: Vec<&Value> = fk
8648                    .parent_columns
8649                    .iter()
8650                    .map(|&pi| &parent_row[pi])
8651                    .collect();
8652                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
8653                    continue;
8654                }
8655                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
8656                    if child_name == cur_parent
8657                        && visited.contains(&(child_name.clone(), child_row_idx))
8658                    {
8659                        continue;
8660                    }
8661                    let matches_key = fk
8662                        .local_columns
8663                        .iter()
8664                        .enumerate()
8665                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
8666                    if !matches_key {
8667                        continue;
8668                    }
8669                    match fk.on_delete {
8670                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
8671                            return Err(EngineError::Unsupported(alloc::format!(
8672                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
8673                                 restricted by FK from {child_name:?}.{:?}",
8674                                fk.local_columns,
8675                            )));
8676                        }
8677                        spg_storage::FkAction::Cascade => {
8678                            if visited.insert((child_name.clone(), child_row_idx)) {
8679                                delete_plan
8680                                    .entry(child_name.clone())
8681                                    .or_default()
8682                                    .insert(child_row_idx);
8683                                work.push((child_name.clone(), child_row.values.clone()));
8684                            }
8685                        }
8686                        spg_storage::FkAction::SetNull => {
8687                            // Verify every local FK column is NULL-able.
8688                            for &li in &fk.local_columns {
8689                                let col = child.schema().columns.get(li).ok_or_else(|| {
8690                                    EngineError::Unsupported(alloc::format!(
8691                                        "FK local column {li} missing in {child_name:?}"
8692                                    ))
8693                                })?;
8694                                if !col.nullable {
8695                                    return Err(EngineError::Unsupported(alloc::format!(
8696                                        "FOREIGN KEY ON DELETE SET NULL: column \
8697                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
8698                                        col.name,
8699                                    )));
8700                                }
8701                            }
8702                            let entry = setnull_plan.entry(child_name.clone()).or_default();
8703                            for &li in &fk.local_columns {
8704                                entry.insert((child_row_idx, li));
8705                            }
8706                        }
8707                        spg_storage::FkAction::SetDefault => {
8708                            // Resolve the DEFAULT for every local FK col.
8709                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
8710                            for &li in &fk.local_columns {
8711                                let col = child.schema().columns.get(li).ok_or_else(|| {
8712                                    EngineError::Unsupported(alloc::format!(
8713                                        "FK local column {li} missing in {child_name:?}"
8714                                    ))
8715                                })?;
8716                                let default = col.default.clone().ok_or_else(|| {
8717                                    EngineError::Unsupported(alloc::format!(
8718                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
8719                                         {child_name:?}.{:?} has no DEFAULT declared",
8720                                        col.name,
8721                                    ))
8722                                })?;
8723                                entry.insert((child_row_idx, li), default);
8724                            }
8725                        }
8726                    }
8727                }
8728            }
8729        }
8730    }
8731    // Flatten the three plans into the ordered `FkChildStep` list.
8732    // Deletes are applied last per child (after any null/default
8733    // re-writes on the same child) so a child row that's both
8734    // re-written and then cascade-deleted only ends up deleted —
8735    // but in v7.6.5 SetNull/Cascade never overlap on the same row
8736    // (a single FK chooses exactly one action), so the order is
8737    // mostly a precaution.
8738    let mut steps: Vec<FkChildStep> = Vec::new();
8739    for (child_table, entries) in setnull_plan {
8740        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
8741        steps.push(FkChildStep {
8742            child_table,
8743            action: FkChildAction::SetNull { positions, columns },
8744        });
8745    }
8746    for (child_table, entries) in setdefault_plan {
8747        let mut positions = Vec::with_capacity(entries.len());
8748        let mut columns = Vec::with_capacity(entries.len());
8749        let mut defaults = Vec::with_capacity(entries.len());
8750        for ((p, c), v) in entries {
8751            positions.push(p);
8752            columns.push(c);
8753            defaults.push(v);
8754        }
8755        steps.push(FkChildStep {
8756            child_table,
8757            action: FkChildAction::SetDefault {
8758                positions,
8759                columns,
8760                defaults,
8761            },
8762        });
8763    }
8764    for (child_table, positions) in delete_plan {
8765        steps.push(FkChildStep {
8766            child_table,
8767            action: FkChildAction::Delete {
8768                positions: positions.into_iter().collect(),
8769            },
8770        });
8771    }
8772    Ok(steps)
8773}
8774
8775/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
8776/// PK/UNIQUE columns. Walks every other table whose FK references
8777/// `parent_table_name`; for each FK whose parent_columns overlap a
8778/// mutated column, decides the action by `fk.on_update`.
8779///
8780///   - RESTRICT / NoAction → error if any child references the OLD
8781///     value
8782///   - CASCADE → child FK columns get rewritten to the NEW parent
8783///     value (a SetNull-style update step with the new value)
8784///   - SetNull → child FK columns set to NULL
8785///   - SetDefault → child FK columns set to declared default
8786///
8787/// `plan_with_old` is `(row_position, old_values, new_values)` so
8788/// the planner can detect "did this row's parent key actually
8789/// change?" — only rows where at least one referenced parent
8790/// column moved trigger inbound work.
8791fn plan_fk_parent_updates(
8792    catalog: &Catalog,
8793    parent_table_name: &str,
8794    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
8795) -> Result<Vec<FkChildStep>, EngineError> {
8796    use alloc::collections::BTreeMap;
8797    if plan_with_old.is_empty() {
8798        return Ok(Vec::new());
8799    }
8800    // For each child table we may touch, build per-child step
8801    // lists. UPDATE never deletes children — `delete_plan` stays
8802    // empty here but is kept structurally aligned with
8803    // `plan_fk_parent_deletions` for future use.
8804    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
8805    let mut setnull_plan: BTreeMap<String, alloc::collections::BTreeSet<(usize, usize)>> =
8806        BTreeMap::new();
8807    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
8808    // Cascade-update plan: child_table → row_idx → col_idx → new_value
8809    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
8810
8811    for child_name in catalog.table_names() {
8812        let child = catalog
8813            .get(&child_name)
8814            .expect("table_names → catalog.get total");
8815        for fk in &child.schema().foreign_keys {
8816            if fk.parent_table != parent_table_name {
8817                continue;
8818            }
8819            for (_pos, old_row, new_row) in plan_with_old {
8820                // Did any parent FK column change?
8821                let key_changed = fk
8822                    .parent_columns
8823                    .iter()
8824                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
8825                if !key_changed {
8826                    continue;
8827                }
8828                // The OLD parent key — used to find referring children.
8829                let old_key: Vec<&Value> =
8830                    fk.parent_columns.iter().map(|&pi| &old_row[pi]).collect();
8831                if old_key.iter().any(|v| matches!(v, Value::Null)) {
8832                    // NULL parent has no children — skip.
8833                    continue;
8834                }
8835                let new_key: Vec<&Value> =
8836                    fk.parent_columns.iter().map(|&pi| &new_row[pi]).collect();
8837                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
8838                    // Self-ref same-row updates: a row updating its
8839                    // own PK doesn't restrict itself.
8840                    if child_name == parent_table_name
8841                        && plan_with_old.iter().any(|(p, _, _)| *p == child_row_idx)
8842                    {
8843                        continue;
8844                    }
8845                    let matches_key = fk
8846                        .local_columns
8847                        .iter()
8848                        .enumerate()
8849                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
8850                    if !matches_key {
8851                        continue;
8852                    }
8853                    match fk.on_update {
8854                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
8855                            return Err(EngineError::Unsupported(alloc::format!(
8856                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
8857                                 restricted by FK from {child_name:?}.{:?}",
8858                                fk.local_columns,
8859                            )));
8860                        }
8861                        spg_storage::FkAction::Cascade => {
8862                            // Rewrite child FK columns to new key.
8863                            let entry = cascade_plan.entry(child_name.clone()).or_default();
8864                            for (i, &li) in fk.local_columns.iter().enumerate() {
8865                                entry.insert((child_row_idx, li), new_key[i].clone());
8866                            }
8867                        }
8868                        spg_storage::FkAction::SetNull => {
8869                            for &li in &fk.local_columns {
8870                                let col = child.schema().columns.get(li).ok_or_else(|| {
8871                                    EngineError::Unsupported(alloc::format!(
8872                                        "FK local column {li} missing in {child_name:?}"
8873                                    ))
8874                                })?;
8875                                if !col.nullable {
8876                                    return Err(EngineError::Unsupported(alloc::format!(
8877                                        "FOREIGN KEY ON UPDATE SET NULL: column \
8878                                         {child_name:?}.{:?} is NOT NULL",
8879                                        col.name,
8880                                    )));
8881                                }
8882                            }
8883                            let entry = setnull_plan.entry(child_name.clone()).or_default();
8884                            for &li in &fk.local_columns {
8885                                entry.insert((child_row_idx, li));
8886                            }
8887                        }
8888                        spg_storage::FkAction::SetDefault => {
8889                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
8890                            for &li in &fk.local_columns {
8891                                let col = child.schema().columns.get(li).ok_or_else(|| {
8892                                    EngineError::Unsupported(alloc::format!(
8893                                        "FK local column {li} missing in {child_name:?}"
8894                                    ))
8895                                })?;
8896                                let default = col.default.clone().ok_or_else(|| {
8897                                    EngineError::Unsupported(alloc::format!(
8898                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
8899                                         {child_name:?}.{:?} has no DEFAULT",
8900                                        col.name,
8901                                    ))
8902                                })?;
8903                                entry.insert((child_row_idx, li), default);
8904                            }
8905                        }
8906                    }
8907                }
8908            }
8909        }
8910    }
8911    // Flatten into FkChildStep list. UPDATE doesn't produce
8912    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
8913    let mut steps: Vec<FkChildStep> = Vec::new();
8914    for (child_table, entries) in cascade_plan {
8915        let mut positions = Vec::with_capacity(entries.len());
8916        let mut columns = Vec::with_capacity(entries.len());
8917        let mut defaults = Vec::with_capacity(entries.len());
8918        for ((p, c), v) in entries {
8919            positions.push(p);
8920            columns.push(c);
8921            defaults.push(v);
8922        }
8923        // We reuse `FkChildAction::SetDefault` for cascade-update:
8924        // both shapes are "write a known value into specific cells"
8925        // — `apply_per_cell_writes` doesn't care whether the value
8926        // came from a DEFAULT declaration or a new parent key.
8927        steps.push(FkChildStep {
8928            child_table,
8929            action: FkChildAction::SetDefault {
8930                positions,
8931                columns,
8932                defaults,
8933            },
8934        });
8935    }
8936    for (child_table, entries) in setnull_plan {
8937        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
8938        steps.push(FkChildStep {
8939            child_table,
8940            action: FkChildAction::SetNull { positions, columns },
8941        });
8942    }
8943    for (child_table, entries) in setdefault_plan {
8944        let mut positions = Vec::with_capacity(entries.len());
8945        let mut columns = Vec::with_capacity(entries.len());
8946        let mut defaults = Vec::with_capacity(entries.len());
8947        for ((p, c), v) in entries {
8948            positions.push(p);
8949            columns.push(c);
8950            defaults.push(v);
8951        }
8952        steps.push(FkChildStep {
8953            child_table,
8954            action: FkChildAction::SetDefault {
8955                positions,
8956                columns,
8957                defaults,
8958            },
8959        });
8960    }
8961    let _ = delete_plan; // UPDATE never deletes children.
8962    Ok(steps)
8963}
8964
8965/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
8966/// the three action variants so the DELETE executor stays a
8967/// simple loop over the planned steps.
8968fn apply_fk_child_step(catalog: &mut Catalog, step: &FkChildStep) -> Result<(), EngineError> {
8969    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
8970        EngineError::Storage(StorageError::TableNotFound {
8971            name: step.child_table.clone(),
8972        })
8973    })?;
8974    match &step.action {
8975        FkChildAction::Delete { positions } => {
8976            let _ = child.delete_rows(positions);
8977        }
8978        FkChildAction::SetNull { positions, columns } => {
8979            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
8980        }
8981        FkChildAction::SetDefault {
8982            positions,
8983            columns,
8984            defaults,
8985        } => {
8986            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
8987        }
8988    }
8989    Ok(())
8990}
8991
8992/// v7.6.5 — write new values into selected child cells via
8993/// `Table::update_row` (the catalog's existing UPDATE entry).
8994/// Groups writes by row position so multi-column updates on the
8995/// same row only call `update_row` once. `value_for(i)` produces
8996/// the new value for the i-th (position, column) entry.
8997fn apply_per_cell_writes(
8998    child: &mut spg_storage::Table,
8999    positions: &[usize],
9000    columns: &[usize],
9001    mut value_for: impl FnMut(usize) -> Value,
9002) -> Result<(), EngineError> {
9003    use alloc::collections::BTreeMap;
9004    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
9005    for i in 0..positions.len() {
9006        by_row
9007            .entry(positions[i])
9008            .or_default()
9009            .push((columns[i], value_for(i)));
9010    }
9011    for (pos, mutations) in by_row {
9012        let mut new_values = child.rows()[pos].values.clone();
9013        for (col, v) in mutations {
9014            if let Some(slot) = new_values.get_mut(col) {
9015                *slot = v;
9016            }
9017        }
9018        child
9019            .update_row(pos, new_values)
9020            .map_err(EngineError::Storage)?;
9021    }
9022    Ok(())
9023}
9024
9025fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
9026    match a {
9027        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
9028        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
9029        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
9030        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
9031        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
9032    }
9033}
9034
9035/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
9036/// default-fill. Free fn (rather than `&self`) so callers
9037/// with an active `&mut Table` borrow can still use it.
9038/// Literal defaults take the cached path (`col.default`);
9039/// runtime defaults hit `clock_fn` at each call. mailrs G4.
9040fn resolve_column_default_free(
9041    col: &ColumnSchema,
9042    clock_fn: Option<ClockFn>,
9043) -> Result<Value, EngineError> {
9044    if let Some(rt) = &col.runtime_default {
9045        return eval_runtime_default_free(rt, col.ty, clock_fn);
9046    }
9047    Ok(col.default.clone().unwrap_or(Value::Null))
9048}
9049
9050fn eval_runtime_default_free(
9051    rt: &str,
9052    ty: DataType,
9053    clock_fn: Option<ClockFn>,
9054) -> Result<Value, EngineError> {
9055    let s = rt.trim().to_ascii_lowercase();
9056    let canonical = s.trim_end_matches("()");
9057    let now_us = match clock_fn {
9058        Some(f) => f(),
9059        None => 0,
9060    };
9061    let v = match canonical {
9062        "now" | "current_timestamp" | "localtimestamp" => Value::Timestamp(now_us),
9063        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
9064        "current_time" | "localtime" => Value::Timestamp(now_us),
9065        other => {
9066            return Err(EngineError::Unsupported(alloc::format!(
9067                "runtime DEFAULT expression {other:?} not supported \
9068                 (v7.9.21 whitelist: now() / current_timestamp / \
9069                 current_date / current_time / localtimestamp / \
9070                 localtime)"
9071            )));
9072        }
9073    };
9074    coerce_value(v, ty, "DEFAULT", 0)
9075}
9076
9077/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
9078/// evaluation rather than being cacheable as a literal Value.
9079/// FunctionCall is the immediate case (`now()`,
9080/// `current_timestamp`). Literal expressions and simple sign-
9081/// flipped numerics still take the static-cache path.
9082fn is_runtime_default_expr(expr: &Expr) -> bool {
9083    match expr {
9084        Expr::FunctionCall { .. } => true,
9085        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
9086        _ => false,
9087    }
9088}
9089
9090fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
9091    let ty = column_type_to_data_type(c.ty);
9092    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
9093    if let Some(default_expr) = c.default {
9094        // v7.9.21 — distinguish literal defaults (evaluated once
9095        // at CREATE TABLE) from expression defaults (deferred to
9096        // INSERT). Function calls (`now()`, `current_timestamp`
9097        // — see v7.9.20 keyword promotion) take the runtime path.
9098        // Literals continue to cache. mailrs G4.
9099        if is_runtime_default_expr(&default_expr) {
9100            let display = alloc::format!("{default_expr}");
9101            schema = schema.with_runtime_default(display);
9102        } else {
9103            let raw = literal_expr_to_value(default_expr)?;
9104            let coerced = coerce_value(raw, ty, &c.name, 0)?;
9105            schema = schema.with_default(coerced);
9106        }
9107    }
9108    if c.auto_increment {
9109        // AUTO_INCREMENT only makes sense on integer-shaped columns.
9110        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
9111            return Err(EngineError::Unsupported(alloc::format!(
9112                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
9113            )));
9114        }
9115        schema = schema.with_auto_increment();
9116    }
9117    Ok(schema)
9118}
9119
9120/// v7.10.4 — decode a BYTEA literal. Accepts:
9121///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
9122///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
9123///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
9124fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
9125    let s = s.trim();
9126    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
9127        // Hex form. Each pair of hex digits → one byte.
9128        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
9129        if cleaned.len() % 2 != 0 {
9130            return Err("odd-length hex literal");
9131        }
9132        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
9133        let cleaned_bytes = cleaned.as_bytes();
9134        for i in (0..cleaned_bytes.len()).step_by(2) {
9135            let hi = hex_nibble(cleaned_bytes[i])?;
9136            let lo = hex_nibble(cleaned_bytes[i + 1])?;
9137            out.push((hi << 4) | lo);
9138        }
9139        return Ok(out);
9140    }
9141    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
9142    // sequences decode; anything else is a literal byte.
9143    let bytes = s.as_bytes();
9144    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
9145    let mut i = 0;
9146    while i < bytes.len() {
9147        let b = bytes[i];
9148        if b == b'\\' && i + 1 < bytes.len() {
9149            let n = bytes[i + 1];
9150            if n == b'\\' {
9151                out.push(b'\\');
9152                i += 2;
9153                continue;
9154            }
9155            if n.is_ascii_digit()
9156                && i + 3 < bytes.len()
9157                && bytes[i + 2].is_ascii_digit()
9158                && bytes[i + 3].is_ascii_digit()
9159            {
9160                let oct = |x: u8| (x - b'0') as u32;
9161                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
9162                if v <= 0xFF {
9163                    out.push(v as u8);
9164                    i += 4;
9165                    continue;
9166                }
9167            }
9168        }
9169        out.push(b);
9170        i += 1;
9171    }
9172    Ok(out)
9173}
9174
9175fn hex_nibble(b: u8) -> Result<u8, &'static str> {
9176    match b {
9177        b'0'..=b'9' => Ok(b - b'0'),
9178        b'a'..=b'f' => Ok(b - b'a' + 10),
9179        b'A'..=b'F' => Ok(b - b'A' + 10),
9180        _ => Err("invalid hex digit"),
9181    }
9182}
9183
9184/// v7.10.11 — decode a PG TEXT[] external array form
9185/// (`{a,b,NULL}` with optional double-quoted elements). The
9186/// engine takes a leading/trailing `{`/`}` and splits at commas.
9187/// Quoted elements (`"hello, world"`) preserve embedded commas;
9188/// `\\` and `\"` decode to literal backslash / quote. Plain
9189/// unquoted `NULL` (case-insensitive) maps to `None`.
9190/// v7.11.13 — pick the array type for `ARRAY[lit, …]` from the
9191/// element values. Single-element-type rules:
9192///   - all NULL / all Text → TextArray
9193///   - all Int (or Int+NULL) → IntArray
9194///   - any BigInt without Text → BigIntArray (widening)
9195///   - any Text → TextArray (fallback; non-string elements
9196///     render as text)
9197fn array_literal_widen(items: alloc::vec::Vec<Value>) -> Value {
9198    let mut has_text = false;
9199    let mut has_bigint = false;
9200    let mut has_int = false;
9201    for v in &items {
9202        match v {
9203            Value::Null => {}
9204            Value::Text(_) | Value::Json(_) => has_text = true,
9205            Value::BigInt(_) => has_bigint = true,
9206            Value::Int(_) | Value::SmallInt(_) => has_int = true,
9207            _ => has_text = true,
9208        }
9209    }
9210    if has_text || (!has_bigint && !has_int) {
9211        let out: alloc::vec::Vec<Option<alloc::string::String>> = items
9212            .into_iter()
9213            .map(|v| match v {
9214                Value::Null => None,
9215                Value::Text(s) | Value::Json(s) => Some(s),
9216                other => Some(alloc::format!("{other:?}")),
9217            })
9218            .collect();
9219        return Value::TextArray(out);
9220    }
9221    if has_bigint {
9222        let out: alloc::vec::Vec<Option<i64>> = items
9223            .into_iter()
9224            .map(|v| match v {
9225                Value::Null => None,
9226                Value::Int(n) => Some(i64::from(n)),
9227                Value::SmallInt(n) => Some(i64::from(n)),
9228                Value::BigInt(n) => Some(n),
9229                _ => unreachable!("widen: unexpected non-integer in BigInt path"),
9230            })
9231            .collect();
9232        return Value::BigIntArray(out);
9233    }
9234    let out: alloc::vec::Vec<Option<i32>> = items
9235        .into_iter()
9236        .map(|v| match v {
9237            Value::Null => None,
9238            Value::Int(n) => Some(n),
9239            Value::SmallInt(n) => Some(i32::from(n)),
9240            _ => unreachable!("widen: unexpected non-i32-compatible in Int path"),
9241        })
9242        .collect();
9243    Value::IntArray(out)
9244}
9245
9246fn decode_text_array_literal(
9247    s: &str,
9248) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
9249    let trimmed = s.trim();
9250    let inner = trimmed
9251        .strip_prefix('{')
9252        .and_then(|x| x.strip_suffix('}'))
9253        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
9254    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
9255    if inner.trim().is_empty() {
9256        return Ok(out);
9257    }
9258    let bytes = inner.as_bytes();
9259    let mut i = 0;
9260    while i <= bytes.len() {
9261        // Skip leading whitespace.
9262        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
9263            i += 1;
9264        }
9265        // Quoted element.
9266        if i < bytes.len() && bytes[i] == b'"' {
9267            i += 1; // open quote
9268            let mut buf = alloc::string::String::new();
9269            while i < bytes.len() && bytes[i] != b'"' {
9270                if bytes[i] == b'\\' && i + 1 < bytes.len() {
9271                    buf.push(bytes[i + 1] as char);
9272                    i += 2;
9273                } else {
9274                    buf.push(bytes[i] as char);
9275                    i += 1;
9276                }
9277            }
9278            if i >= bytes.len() {
9279                return Err("unterminated quoted element");
9280            }
9281            i += 1; // close quote
9282            out.push(Some(buf));
9283        } else {
9284            // Unquoted element — read until next comma or end.
9285            let start = i;
9286            while i < bytes.len() && bytes[i] != b',' {
9287                i += 1;
9288            }
9289            let raw = inner[start..i].trim();
9290            if raw.eq_ignore_ascii_case("NULL") {
9291                out.push(None);
9292            } else {
9293                out.push(Some(alloc::string::ToString::to_string(raw)));
9294            }
9295        }
9296        // Skip whitespace, expect comma or end.
9297        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
9298            i += 1;
9299        }
9300        if i >= bytes.len() {
9301            break;
9302        }
9303        if bytes[i] != b',' {
9304            return Err("expected ',' between TEXT[] elements");
9305        }
9306        i += 1;
9307    }
9308    Ok(out)
9309}
9310
9311/// v7.10.11 — encode a TEXT[] back into the PG external array
9312/// form. NULL elements become the literal `NULL`; elements
9313/// containing commas, quotes, backslashes, or braces are
9314/// double-quoted with `\\` / `\"` escapes.
9315fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
9316    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
9317    out.push('{');
9318    for (i, item) in items.iter().enumerate() {
9319        if i > 0 {
9320            out.push(',');
9321        }
9322        match item {
9323            None => out.push_str("NULL"),
9324            Some(s) => {
9325                let needs_quote = s.is_empty()
9326                    || s.eq_ignore_ascii_case("NULL")
9327                    || s.chars()
9328                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
9329                if needs_quote {
9330                    out.push('"');
9331                    for c in s.chars() {
9332                        if c == '"' || c == '\\' {
9333                            out.push('\\');
9334                        }
9335                        out.push(c);
9336                    }
9337                    out.push('"');
9338                } else {
9339                    out.push_str(s);
9340                }
9341            }
9342        }
9343    }
9344    out.push('}');
9345    out
9346}
9347
9348/// v7.10.4 — encode BYTEA bytes in PG hex output format
9349/// (`\x` prefix, lowercase hex pairs). Used by Text-side
9350/// round-trip + the wire layer's text-mode encoder.
9351fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
9352    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
9353    out.push_str("\\x");
9354    for byte in b {
9355        let hi = byte >> 4;
9356        let lo = byte & 0x0F;
9357        out.push(hex_digit(hi));
9358        out.push(hex_digit(lo));
9359    }
9360    out
9361}
9362
9363const fn hex_digit(n: u8) -> char {
9364    match n {
9365        0..=9 => (b'0' + n) as char,
9366        10..=15 => (b'a' + n - 10) as char,
9367        _ => '?',
9368    }
9369}
9370
9371const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
9372    match t {
9373        ColumnTypeName::SmallInt => DataType::SmallInt,
9374        ColumnTypeName::Int => DataType::Int,
9375        ColumnTypeName::BigInt => DataType::BigInt,
9376        ColumnTypeName::Float => DataType::Float,
9377        ColumnTypeName::Text => DataType::Text,
9378        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
9379        ColumnTypeName::Char(n) => DataType::Char(n),
9380        ColumnTypeName::Bool => DataType::Bool,
9381        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
9382            dim,
9383            encoding: match encoding {
9384                SqlVecEncoding::F32 => VecEncoding::F32,
9385                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
9386                SqlVecEncoding::F16 => VecEncoding::F16,
9387            },
9388        },
9389        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
9390        ColumnTypeName::Date => DataType::Date,
9391        ColumnTypeName::Timestamp => DataType::Timestamp,
9392        ColumnTypeName::Timestamptz => DataType::Timestamptz,
9393        ColumnTypeName::Json => DataType::Json,
9394        ColumnTypeName::Jsonb => DataType::Jsonb,
9395        ColumnTypeName::Bytes => DataType::Bytes,
9396        ColumnTypeName::TextArray => DataType::TextArray,
9397        ColumnTypeName::IntArray => DataType::IntArray,
9398        ColumnTypeName::BigIntArray => DataType::BigIntArray,
9399    }
9400}
9401
9402/// Convert an INSERT VALUES expression to a storage Value. Supports literal
9403/// expressions, unary-minus over numeric literals, and pgvector-style
9404/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
9405fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
9406    match expr {
9407        Expr::Literal(l) => Ok(literal_to_value(l)),
9408        Expr::Cast { expr, target } => {
9409            let inner_value = literal_expr_to_value(*expr)?;
9410            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
9411        }
9412        Expr::Unary {
9413            op: UnOp::Neg,
9414            expr,
9415        } => match *expr {
9416            Expr::Literal(Literal::Integer(n)) => {
9417                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
9418                // — overflow on negate of i64::MIN is the one edge case.
9419                let neg = n.checked_neg().ok_or_else(|| {
9420                    EngineError::Unsupported("integer literal overflow on negation".into())
9421                })?;
9422                Ok(int_value_for(neg))
9423            }
9424            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
9425            other => Err(EngineError::Unsupported(alloc::format!(
9426                "unary minus over non-literal expression: {other:?}"
9427            ))),
9428        },
9429        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
9430        // INSERT-time. Each element must reduce to a Value through
9431        // `literal_expr_to_value`; NULL elements become `None`.
9432        // v7.11.13 — deduce shape from element values: all Int →
9433        // IntArray; any BigInt → BigIntArray (widening); any Text
9434        // → TextArray. Cast targets (`ARRAY[]::INT[]`) flow through
9435        // the outer Cast arm before reaching here and re-coerce.
9436        Expr::Array(items) => {
9437            let mut materialised: alloc::vec::Vec<Value> = alloc::vec::Vec::with_capacity(items.len());
9438            for elem in items {
9439                materialised.push(literal_expr_to_value(elem)?);
9440            }
9441            Ok(array_literal_widen(materialised))
9442        }
9443        other => Err(EngineError::Unsupported(alloc::format!(
9444            "non-literal INSERT value expression: {other:?}"
9445        ))),
9446    }
9447}
9448
9449fn literal_to_value(l: Literal) -> Value {
9450    match l {
9451        Literal::Integer(n) => int_value_for(n),
9452        Literal::Float(x) => Value::Float(x),
9453        Literal::String(s) => Value::Text(s),
9454        Literal::Bool(b) => Value::Bool(b),
9455        Literal::Null => Value::Null,
9456        Literal::Vector(v) => Value::Vector(v),
9457        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
9458    }
9459}
9460
9461/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
9462/// columns will still enforce the right tag downstream — this is just the
9463/// default we synthesise from an unannotated integer literal.
9464fn int_value_for(n: i64) -> Value {
9465    if let Ok(small) = i32::try_from(n) {
9466        Value::Int(small)
9467    } else {
9468        Value::BigInt(n)
9469    }
9470}
9471
9472/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
9473/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
9474/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
9475/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
9476/// `NULL` is always permitted; the nullability check happens later in storage.
9477#[allow(clippy::too_many_lines)]
9478fn coerce_value(
9479    v: Value,
9480    expected: DataType,
9481    col_name: &str,
9482    position: usize,
9483) -> Result<Value, EngineError> {
9484    if v.is_null() {
9485        return Ok(Value::Null);
9486    }
9487    let actual = v.data_type().expect("non-null");
9488    if actual == expected {
9489        return Ok(v);
9490    }
9491    let coerced = match (v, expected) {
9492        (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
9493        (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
9494        (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
9495        (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
9496            i128::from(n),
9497            precision,
9498            scale,
9499            col_name,
9500        )?),
9501        (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
9502        (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
9503        (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
9504        (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
9505            i128::from(n),
9506            precision,
9507            scale,
9508            col_name,
9509        )?),
9510        (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
9511        (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
9512        #[allow(clippy::cast_precision_loss)]
9513        (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
9514        (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
9515            i128::from(n),
9516            precision,
9517            scale,
9518            col_name,
9519        )?),
9520        (Value::Float(x), DataType::Numeric { precision, scale }) => {
9521            Some(numeric_from_float(x, precision, scale, col_name)?)
9522        }
9523        // Text → DATE / TIMESTAMP: parse canonical text forms.
9524        (Value::Text(s), DataType::Date) => {
9525            let d = eval::parse_date_literal(&s).ok_or_else(|| {
9526                EngineError::Eval(EvalError::TypeMismatch {
9527                    detail: alloc::format!("cannot parse {s:?} as DATE for column `{col_name}`"),
9528                })
9529            })?;
9530            Some(Value::Date(d))
9531        }
9532        // v4.9: Text ↔ JSON coercion. No structural validation —
9533        // any text literal is accepted; the responsibility for
9534        // valid JSON lies with the producer.
9535        (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
9536        (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
9537        // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
9538        //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
9539        //   - Escape: `Hello\\000world`  (backslash + octal triples)
9540        //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
9541        // Errors surface as TypeMismatch so the operator gets a
9542        // clear "this literal isn't a bytea literal" hint.
9543        (Value::Text(s), DataType::Bytes) => {
9544            let bytes = decode_bytea_literal(&s).map_err(|e| {
9545                EngineError::Eval(EvalError::TypeMismatch {
9546                    detail: alloc::format!(
9547                        "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
9548                    ),
9549                })
9550            })?;
9551            Some(Value::Bytes(bytes))
9552        }
9553        // v7.10.4 — BYTEA → Text round-trip uses the PG hex
9554        // output (lowercase, `\x` prefix). Important when a
9555        // SELECT pulls a bytea cell through a Text column path.
9556        (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
9557        // v7.10.11 — Text → TEXT[]. Decode PG's external array
9558        // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
9559        // is the literal `NULL`; everything else is a quoted or
9560        // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
9561        (Value::Text(s), DataType::TextArray) => {
9562            let arr = decode_text_array_literal(&s).map_err(|e| {
9563                EngineError::Eval(EvalError::TypeMismatch {
9564                    detail: alloc::format!(
9565                        "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
9566                    ),
9567                })
9568            })?;
9569            Some(Value::TextArray(arr))
9570        }
9571        // v7.10.11 — TEXT[] → Text round-trip uses PG's
9572        // external array form (`{a,b,NULL}`). Lets a SELECT
9573        // pull an array column through any Text-side codepath.
9574        (Value::TextArray(items), DataType::Text) => Some(Value::Text(encode_text_array(&items))),
9575        (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
9576            let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
9577                EngineError::Eval(EvalError::TypeMismatch {
9578                    detail: alloc::format!(
9579                        "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
9580                    ),
9581                })
9582            })?;
9583            Some(Value::Timestamp(t))
9584        }
9585        // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
9586        // TIMESTAMP → day truncation).
9587        (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
9588            Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
9589        }
9590        // v7.9.21 — Value::Timestamp lands in either Timestamp
9591        // or Timestamptz columns; the on-disk layout is the
9592        // same i64 microseconds UTC.
9593        (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
9594        (Value::Timestamp(t), DataType::Date) => {
9595            let days = t.div_euclid(86_400_000_000);
9596            i32::try_from(days).ok().map(Value::Date)
9597        }
9598        (
9599            Value::Numeric {
9600                scaled,
9601                scale: src_scale,
9602            },
9603            DataType::Numeric { precision, scale },
9604        ) => Some(numeric_rescale(
9605            scaled, src_scale, precision, scale, col_name,
9606        )?),
9607        #[allow(clippy::cast_precision_loss)]
9608        (Value::Numeric { scaled, scale }, DataType::Float) => {
9609            let mut div = 1.0_f64;
9610            for _ in 0..scale {
9611                div *= 10.0;
9612            }
9613            Some(Value::Float((scaled as f64) / div))
9614        }
9615        (Value::Numeric { scaled, scale }, DataType::Int) => {
9616            let truncated = numeric_truncate_to_integer(scaled, scale);
9617            i32::try_from(truncated).ok().map(Value::Int)
9618        }
9619        (Value::Numeric { scaled, scale }, DataType::BigInt) => {
9620            let truncated = numeric_truncate_to_integer(scaled, scale);
9621            i64::try_from(truncated).ok().map(Value::BigInt)
9622        }
9623        (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
9624            let truncated = numeric_truncate_to_integer(scaled, scale);
9625            i16::try_from(truncated).ok().map(Value::SmallInt)
9626        }
9627        // VARCHAR(n) enforces an upper bound on character count.
9628        (Value::Text(s), DataType::Varchar(max)) => {
9629            if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
9630                Some(Value::Text(s))
9631            } else {
9632                return Err(EngineError::Unsupported(alloc::format!(
9633                    "value for VARCHAR({max}) column `{col_name}` exceeds length: \
9634                     {} chars",
9635                    s.chars().count()
9636                )));
9637            }
9638        }
9639        // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
9640        // when the column declares `VECTOR(N) USING SQ8` and
9641        // the INSERT VALUES expression yields a raw f32 vector
9642        // (the normal pgvector-shape literal). Dim mismatch
9643        // falls through the `_ => None` arm and surfaces as
9644        // `TypeMismatch` with the expected SQ8 column type —
9645        // matching the F32 path's existing error.
9646        (
9647            Value::Vector(v),
9648            DataType::Vector {
9649                dim,
9650                encoding: VecEncoding::Sq8,
9651            },
9652        ) if v.len() == dim as usize => Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v))),
9653        // v6.0.3: f32 → f16 INSERT-time conversion for HALF
9654        // columns. Bit-exact at the storage layer (modulo
9655        // half-precision rounding); no rerank pass needed at
9656        // search time.
9657        (
9658            Value::Vector(v),
9659            DataType::Vector {
9660                dim,
9661                encoding: VecEncoding::F16,
9662            },
9663        ) if v.len() == dim as usize => Some(Value::HalfVector(
9664            spg_storage::halfvec::HalfVector::from_f32_slice(&v),
9665        )),
9666        // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
9667        // is already longer we reject (PG truncates trailing-space-only;
9668        // staying strict for v1).
9669        (Value::Text(s), DataType::Char(size)) => {
9670            let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
9671            if len > size {
9672                return Err(EngineError::Unsupported(alloc::format!(
9673                    "value for CHAR({size}) column `{col_name}` exceeds length: \
9674                     {len} chars"
9675                )));
9676            }
9677            let need = (size - len) as usize;
9678            let mut padded = s;
9679            padded.reserve(need);
9680            for _ in 0..need {
9681                padded.push(' ');
9682            }
9683            Some(Value::Text(padded))
9684        }
9685        _ => None,
9686    };
9687    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
9688        column: col_name.into(),
9689        expected,
9690        actual,
9691        position,
9692    }))
9693}
9694
9695#[cfg(test)]
9696mod tests {
9697    use super::*;
9698    use alloc::vec;
9699
9700    fn unwrap_command_ok(r: &QueryResult) -> usize {
9701        match r {
9702            QueryResult::CommandOk { affected, .. } => *affected,
9703            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
9704        }
9705    }
9706
9707    #[test]
9708    fn create_table_registers_schema() {
9709        let mut e = Engine::new();
9710        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
9711            .unwrap();
9712        assert_eq!(e.catalog().table_count(), 1);
9713        let t = e.catalog().get("foo").unwrap();
9714        assert_eq!(t.schema().columns.len(), 2);
9715        assert_eq!(t.schema().columns[0].ty, DataType::Int);
9716        assert!(!t.schema().columns[0].nullable);
9717        assert_eq!(t.schema().columns[1].ty, DataType::Text);
9718    }
9719
9720    #[test]
9721    fn create_table_vector_default_is_f32_encoded() {
9722        let mut e = Engine::new();
9723        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
9724        let t = e.catalog().get("t").unwrap();
9725        assert_eq!(
9726            t.schema().columns[0].ty,
9727            DataType::Vector {
9728                dim: 8,
9729                encoding: VecEncoding::F32,
9730            },
9731        );
9732    }
9733
9734    #[test]
9735    fn create_table_vector_using_sq8_succeeds() {
9736        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
9737        // is lifted. CREATE TABLE persists an SQ8 column type in
9738        // the catalog; INSERT (next test) quantises raw f32 input.
9739        let mut e = Engine::new();
9740        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
9741        let t = e.catalog().get("t").unwrap();
9742        assert_eq!(
9743            t.schema().columns[0].ty,
9744            DataType::Vector {
9745                dim: 8,
9746                encoding: VecEncoding::Sq8,
9747            },
9748        );
9749    }
9750
9751    #[test]
9752    fn insert_into_sq8_column_quantises_f32_payload() {
9753        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
9754        // `Value::Vector(Vec<f32>)` literal into the column's
9755        // quantised representation. The row that lands in the
9756        // catalog must therefore hold a `Value::Sq8Vector`, not the
9757        // original f32 buffer — that's the bit that delivers the
9758        // 4× compression target.
9759        let mut e = Engine::new();
9760        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
9761        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
9762            .unwrap();
9763        let t = e.catalog().get("t").unwrap();
9764        assert_eq!(t.rows().len(), 1);
9765        match &t.rows()[0].values[0] {
9766            Value::Sq8Vector(q) => {
9767                assert_eq!(q.bytes.len(), 4);
9768                // min/max are derived from the payload: min=0.0, max=1.0.
9769                assert!((q.min - 0.0).abs() < 1e-6);
9770                assert!((q.max - 1.0).abs() < 1e-6);
9771            }
9772            other => panic!("expected Sq8Vector cell, got {other:?}"),
9773        }
9774    }
9775
9776    #[test]
9777    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
9778        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
9779        // converts the incoming `Value::Vector(Vec<f32>)` cell
9780        // into `Value::HalfVector(HalfVector)` via the new
9781        // `coerce_value` arm. The dequantised round-trip is
9782        // bit-exact for f16-representable values, so 0.0 / 0.25
9783        // / 0.5 / 1.0 hit their grid points exactly.
9784        let mut e = Engine::new();
9785        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
9786            .unwrap();
9787        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
9788            .unwrap();
9789        let t = e.catalog().get("t").unwrap();
9790        assert_eq!(t.rows().len(), 1);
9791        match &t.rows()[0].values[0] {
9792            Value::HalfVector(h) => {
9793                assert_eq!(h.dim(), 4);
9794                let back = h.to_f32_vec();
9795                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
9796                for (g, e) in back.iter().zip(expected.iter()) {
9797                    assert!(
9798                        (g - e).abs() < 1e-6,
9799                        "{g} vs {e} should be exact on f16 grid"
9800                    );
9801                }
9802            }
9803            other => panic!("expected HalfVector cell, got {other:?}"),
9804        }
9805    }
9806
9807    #[test]
9808    fn alter_index_rebuild_in_place_succeeds() {
9809        // v6.0.4: bare REBUILD (no encoding switch) walks every
9810        // row again to rebuild the NSW graph. Verifies the engine
9811        // dispatch + storage helper plumbing without changing any
9812        // cell encoding.
9813        let mut e = Engine::new();
9814        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
9815            .unwrap();
9816        for i in 0..8_i32 {
9817            #[allow(clippy::cast_precision_loss)]
9818            let base = (i as f32) * 0.1;
9819            e.execute(&alloc::format!(
9820                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
9821                b1 = base + 0.01,
9822                b2 = base + 0.02,
9823            ))
9824            .unwrap();
9825        }
9826        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
9827        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
9828        // Schema encoding stays F32 (no encoding clause).
9829        assert_eq!(
9830            e.catalog().get("t").unwrap().schema().columns[1].ty,
9831            DataType::Vector {
9832                dim: 3,
9833                encoding: VecEncoding::F32,
9834            },
9835        );
9836    }
9837
9838    #[test]
9839    fn alter_index_rebuild_with_encoding_switches_cell_type() {
9840        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
9841        // stored cell from F32 → SQ8 + rebuilds the graph atop the
9842        // new encoding. Post-rebuild, cells must be Sq8Vector and
9843        // the schema must report encoding = Sq8.
9844        let mut e = Engine::new();
9845        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
9846            .unwrap();
9847        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
9848            .unwrap();
9849        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
9850        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
9851            .unwrap();
9852        let t = e.catalog().get("t").unwrap();
9853        assert_eq!(
9854            t.schema().columns[1].ty,
9855            DataType::Vector {
9856                dim: 4,
9857                encoding: VecEncoding::Sq8,
9858            },
9859        );
9860        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
9861    }
9862
9863    #[test]
9864    fn alter_index_rebuild_unknown_index_errors() {
9865        let mut e = Engine::new();
9866        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
9867        assert!(
9868            matches!(
9869                &err,
9870                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
9871            ),
9872            "got: {err}"
9873        );
9874    }
9875
9876    #[test]
9877    fn alter_index_rebuild_on_btree_index_errors() {
9878        // REBUILD on a B-tree index has no semantic meaning in
9879        // v6.0.4 — rejected at the storage layer with `Unsupported`.
9880        let mut e = Engine::new();
9881        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
9882        e.execute("INSERT INTO t VALUES (1)").unwrap();
9883        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
9884        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
9885        assert!(
9886            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
9887            "got: {err}"
9888        );
9889    }
9890
9891    #[test]
9892    fn prepared_insert_substitutes_placeholders() {
9893        // v6.1.1: prepare() parses once; execute_prepared() walks the
9894        // AST and replaces $1/$2 with the param Values BEFORE the
9895        // dispatch sees them. Same logical result as a simple-query
9896        // INSERT, but parse happens once per *statement*, not per
9897        // execution.
9898        let mut e = Engine::new();
9899        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
9900            .unwrap();
9901        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
9902        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
9903            e.execute_prepared(stmt.clone(), &[Value::Int(id), Value::Text(name.into())])
9904                .unwrap();
9905        }
9906        // Read back via simple-query SELECT.
9907        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
9908        let QueryResult::Rows { rows, .. } = rows_result else {
9909            panic!("expected Rows")
9910        };
9911        assert_eq!(rows.len(), 3);
9912    }
9913
9914    #[test]
9915    fn prepared_select_with_placeholder_filters_rows() {
9916        let mut e = Engine::new();
9917        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
9918            .unwrap();
9919        for i in 0..10_i32 {
9920            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
9921                .unwrap();
9922        }
9923        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
9924        let QueryResult::Rows { rows, .. } = e.execute_prepared(stmt, &[Value::Int(35)]).unwrap()
9925        else {
9926            panic!("expected Rows")
9927        };
9928        // v = 35 means i*7 = 35 → i = 5.
9929        assert_eq!(rows.len(), 1);
9930        assert_eq!(rows[0].values[0], Value::Int(5));
9931    }
9932
9933    #[test]
9934    fn prepared_too_few_params_errors() {
9935        let mut e = Engine::new();
9936        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
9937        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
9938        let err = e.execute_prepared(stmt, &[]).unwrap_err();
9939        assert!(
9940            matches!(
9941                &err,
9942                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
9943            ),
9944            "got: {err}"
9945        );
9946    }
9947
9948    #[test]
9949    fn insert_into_half_column_dim_mismatch_errors() {
9950        let mut e = Engine::new();
9951        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
9952            .unwrap();
9953        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
9954        assert!(matches!(
9955            &err,
9956            EngineError::Storage(StorageError::TypeMismatch { .. })
9957        ));
9958    }
9959
9960    #[test]
9961    fn insert_into_sq8_column_dim_mismatch_errors() {
9962        // Dim mismatch falls through the `coerce_value` Vector→Sq8
9963        // arm's guard and surfaces as `TypeMismatch` — the same
9964        // error the F32 path produces today, so client error
9965        // handling stays uniform across encodings.
9966        let mut e = Engine::new();
9967        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
9968        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
9969        assert!(
9970            matches!(
9971                &err,
9972                EngineError::Storage(StorageError::TypeMismatch { .. })
9973            ),
9974            "got: {err}",
9975        );
9976    }
9977
9978    #[test]
9979    fn create_table_duplicate_errors() {
9980        let mut e = Engine::new();
9981        e.execute("CREATE TABLE foo (a INT)").unwrap();
9982        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
9983        assert!(matches!(
9984            err,
9985            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
9986        ));
9987    }
9988
9989    #[test]
9990    fn insert_into_unknown_table_errors() {
9991        let mut e = Engine::new();
9992        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
9993        assert!(matches!(
9994            err,
9995            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
9996        ));
9997    }
9998
9999    #[test]
10000    fn insert_happy_path_reports_one_affected() {
10001        let mut e = Engine::new();
10002        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
10003        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
10004        assert_eq!(unwrap_command_ok(&r), 1);
10005        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
10006    }
10007
10008    #[test]
10009    fn insert_arity_mismatch_propagates() {
10010        let mut e = Engine::new();
10011        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
10012        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
10013        assert!(matches!(
10014            err,
10015            EngineError::Storage(StorageError::ArityMismatch { .. })
10016        ));
10017    }
10018
10019    #[test]
10020    fn insert_negative_integer_via_unary_minus() {
10021        let mut e = Engine::new();
10022        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
10023        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
10024        let rows = e.catalog().get("foo").unwrap().rows();
10025        assert_eq!(rows[0].values[0], Value::Int(-7));
10026    }
10027
10028    #[test]
10029    fn insert_non_literal_expr_unsupported() {
10030        let mut e = Engine::new();
10031        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
10032        let err = e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap_err();
10033        assert!(matches!(err, EngineError::Unsupported(_)));
10034    }
10035
10036    #[test]
10037    fn select_star_returns_all_rows_in_insertion_order() {
10038        let mut e = Engine::new();
10039        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
10040            .unwrap();
10041        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
10042        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
10043        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
10044
10045        let r = e.execute("SELECT * FROM foo").unwrap();
10046        let QueryResult::Rows { columns, rows } = r else {
10047            panic!("expected Rows")
10048        };
10049        assert_eq!(columns.len(), 2);
10050        assert_eq!(columns[0].name, "a");
10051        assert_eq!(rows.len(), 3);
10052        assert_eq!(
10053            rows[1].values,
10054            vec![Value::Int(2), Value::Text("two".into())]
10055        );
10056    }
10057
10058    #[test]
10059    fn select_star_on_empty_table_returns_zero_rows() {
10060        let mut e = Engine::new();
10061        e.execute("CREATE TABLE foo (a INT)").unwrap();
10062        let r = e.execute("SELECT * FROM foo").unwrap();
10063        match r {
10064            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
10065            QueryResult::CommandOk { .. } => panic!("expected Rows"),
10066        }
10067    }
10068
10069    // --- v0.4: WHERE + projection ------------------------------------------
10070
10071    fn make_three_row_users(e: &mut Engine) {
10072        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
10073            .unwrap();
10074        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
10075            .unwrap();
10076        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
10077            .unwrap();
10078        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
10079            .unwrap();
10080    }
10081
10082    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
10083        match r {
10084            QueryResult::Rows { columns, rows } => (columns, rows),
10085            QueryResult::CommandOk { .. } => panic!("expected Rows"),
10086        }
10087    }
10088
10089    #[test]
10090    fn where_filter_passes_only_true_rows() {
10091        let mut e = Engine::new();
10092        make_three_row_users(&mut e);
10093        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
10094        let (_, rows) = unwrap_rows(r);
10095        assert_eq!(rows.len(), 2);
10096        assert_eq!(rows[0].values[0], Value::Int(2));
10097        assert_eq!(rows[1].values[0], Value::Int(3));
10098    }
10099
10100    #[test]
10101    fn where_with_null_result_filters_out_row() {
10102        let mut e = Engine::new();
10103        make_three_row_users(&mut e);
10104        // score is NULL for bob → score > 80 is NULL → row excluded
10105        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
10106        let (_, rows) = unwrap_rows(r);
10107        assert_eq!(rows.len(), 1);
10108        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
10109    }
10110
10111    #[test]
10112    fn projection_named_columns() {
10113        let mut e = Engine::new();
10114        make_three_row_users(&mut e);
10115        let r = e.execute("SELECT name, score FROM users").unwrap();
10116        let (cols, rows) = unwrap_rows(r);
10117        assert_eq!(cols.len(), 2);
10118        assert_eq!(cols[0].name, "name");
10119        assert_eq!(cols[1].name, "score");
10120        assert_eq!(rows.len(), 3);
10121        assert_eq!(
10122            rows[0].values,
10123            vec![Value::Text("alice".into()), Value::Int(90)]
10124        );
10125    }
10126
10127    #[test]
10128    fn projection_with_column_alias() {
10129        let mut e = Engine::new();
10130        make_three_row_users(&mut e);
10131        let r = e
10132            .execute("SELECT name AS who FROM users WHERE id = 1")
10133            .unwrap();
10134        let (cols, rows) = unwrap_rows(r);
10135        assert_eq!(cols[0].name, "who");
10136        assert_eq!(rows.len(), 1);
10137        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
10138    }
10139
10140    #[test]
10141    fn qualified_column_with_table_alias_resolves() {
10142        let mut e = Engine::new();
10143        make_three_row_users(&mut e);
10144        let r = e
10145            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
10146            .unwrap();
10147        let (cols, rows) = unwrap_rows(r);
10148        assert_eq!(cols.len(), 2);
10149        assert_eq!(rows.len(), 2);
10150    }
10151
10152    #[test]
10153    fn qualified_column_with_wrong_alias_errors() {
10154        let mut e = Engine::new();
10155        make_three_row_users(&mut e);
10156        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
10157        assert!(matches!(
10158            err,
10159            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
10160        ));
10161    }
10162
10163    #[test]
10164    fn select_unknown_column_errors_in_projection() {
10165        let mut e = Engine::new();
10166        make_three_row_users(&mut e);
10167        let err = e.execute("SELECT ghost FROM users").unwrap_err();
10168        assert!(matches!(
10169            err,
10170            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
10171        ));
10172    }
10173
10174    #[test]
10175    fn where_unknown_column_errors() {
10176        let mut e = Engine::new();
10177        make_three_row_users(&mut e);
10178        let err = e
10179            .execute("SELECT * FROM users WHERE ghost = 1")
10180            .unwrap_err();
10181        assert!(matches!(
10182            err,
10183            EngineError::Eval(EvalError::ColumnNotFound { .. })
10184        ));
10185    }
10186
10187    #[test]
10188    fn expression_projection_evaluates_and_renders() {
10189        // Compound expressions in the SELECT list are evaluated per row;
10190        // the output column is typed TEXT, name defaults to the expression.
10191        let mut e = Engine::new();
10192        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
10193        e.execute("INSERT INTO t VALUES (3)").unwrap();
10194        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
10195        assert_eq!(rows.len(), 1);
10196        // The expression evaluates to integer 3; rendered as the cell value
10197        // (storage::Value::Int(3) since arithmetic kept ints).
10198        assert_eq!(rows[0].values[0], Value::Int(3));
10199    }
10200
10201    #[test]
10202    fn select_unknown_table_errors() {
10203        let mut e = Engine::new();
10204        let err = e.execute("SELECT * FROM ghost").unwrap_err();
10205        assert!(matches!(
10206            err,
10207            EngineError::Storage(StorageError::TableNotFound { .. })
10208        ));
10209    }
10210
10211    #[test]
10212    fn invalid_sql_returns_parse_error() {
10213        // v4.4: UPDATE is now real SQL, so use a true syntactic
10214        // garbage payload for the parse-error path.
10215        let mut e = Engine::new();
10216        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
10217        assert!(matches!(err, EngineError::Parse(_)));
10218    }
10219
10220    // --- v0.8 CREATE INDEX + index seek ------------------------------------
10221
10222    #[test]
10223    fn create_index_registers_on_table() {
10224        let mut e = Engine::new();
10225        make_three_row_users(&mut e);
10226        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
10227        let t = e.catalog().get("users").unwrap();
10228        assert_eq!(t.indices().len(), 1);
10229        assert_eq!(t.indices()[0].name, "by_name");
10230    }
10231
10232    #[test]
10233    fn create_index_on_unknown_table_errors() {
10234        let mut e = Engine::new();
10235        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
10236        assert!(matches!(
10237            err,
10238            EngineError::Storage(StorageError::TableNotFound { .. })
10239        ));
10240    }
10241
10242    #[test]
10243    fn create_index_on_unknown_column_errors() {
10244        let mut e = Engine::new();
10245        make_three_row_users(&mut e);
10246        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
10247        assert!(matches!(
10248            err,
10249            EngineError::Storage(StorageError::ColumnNotFound { .. })
10250        ));
10251    }
10252
10253    #[test]
10254    fn select_eq_uses_index_returns_same_rows_as_scan() {
10255        // Build two engines: one with an index, one without. Same query →
10256        // same row set (index is a planner optimisation, not a semantic
10257        // change).
10258        let mut without = Engine::new();
10259        make_three_row_users(&mut without);
10260        let mut with = Engine::new();
10261        make_three_row_users(&mut with);
10262        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
10263
10264        let q = "SELECT * FROM users WHERE id = 2";
10265        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
10266        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
10267        assert_eq!(no_idx_rows, idx_rows);
10268        assert_eq!(idx_rows.len(), 1);
10269    }
10270
10271    #[test]
10272    fn select_eq_with_no_matching_index_value_returns_empty() {
10273        let mut e = Engine::new();
10274        make_three_row_users(&mut e);
10275        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
10276        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
10277        assert_eq!(rows.len(), 0);
10278    }
10279
10280    // --- v0.9 transactions -------------------------------------------------
10281
10282    #[test]
10283    fn begin_sets_in_transaction_flag() {
10284        let mut e = Engine::new();
10285        assert!(!e.in_transaction());
10286        e.execute("BEGIN").unwrap();
10287        assert!(e.in_transaction());
10288    }
10289
10290    #[test]
10291    fn double_begin_errors() {
10292        let mut e = Engine::new();
10293        e.execute("BEGIN").unwrap();
10294        let err = e.execute("BEGIN").unwrap_err();
10295        assert_eq!(err, EngineError::TransactionAlreadyOpen);
10296    }
10297
10298    #[test]
10299    fn commit_without_begin_errors() {
10300        let mut e = Engine::new();
10301        let err = e.execute("COMMIT").unwrap_err();
10302        assert_eq!(err, EngineError::NoActiveTransaction);
10303    }
10304
10305    #[test]
10306    fn rollback_without_begin_errors() {
10307        let mut e = Engine::new();
10308        let err = e.execute("ROLLBACK").unwrap_err();
10309        assert_eq!(err, EngineError::NoActiveTransaction);
10310    }
10311
10312    #[test]
10313    fn commit_applies_shadow_to_committed_catalog() {
10314        let mut e = Engine::new();
10315        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
10316        e.execute("BEGIN").unwrap();
10317        e.execute("INSERT INTO t VALUES (1)").unwrap();
10318        e.execute("INSERT INTO t VALUES (2)").unwrap();
10319        e.execute("COMMIT").unwrap();
10320        assert!(!e.in_transaction());
10321        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
10322    }
10323
10324    #[test]
10325    fn rollback_discards_shadow() {
10326        let mut e = Engine::new();
10327        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
10328        e.execute("BEGIN").unwrap();
10329        e.execute("INSERT INTO t VALUES (1)").unwrap();
10330        e.execute("INSERT INTO t VALUES (2)").unwrap();
10331        e.execute("ROLLBACK").unwrap();
10332        assert!(!e.in_transaction());
10333        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
10334    }
10335
10336    #[test]
10337    fn select_during_tx_sees_uncommitted_writes_own_session() {
10338        // The shadow catalog is read by SELECTs while a TX is open — the
10339        // session can see its own pending writes.
10340        let mut e = Engine::new();
10341        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
10342        e.execute("BEGIN").unwrap();
10343        e.execute("INSERT INTO t VALUES (42)").unwrap();
10344        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
10345        assert_eq!(rows.len(), 1);
10346        assert_eq!(rows[0].values[0], Value::Int(42));
10347    }
10348
10349    #[test]
10350    fn snapshot_with_no_users_is_bare_catalog_format() {
10351        let mut e = Engine::new();
10352        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10353        let bytes = e.snapshot();
10354        assert_eq!(
10355            &bytes[..8],
10356            b"SPGDB001",
10357            "must be the bare v3.x catalog magic"
10358        );
10359        let e2 = Engine::restore_envelope(&bytes).unwrap();
10360        assert!(e2.users().is_empty());
10361        assert_eq!(e2.catalog().table_count(), 1);
10362    }
10363
10364    #[test]
10365    fn snapshot_with_users_round_trips_both_via_envelope() {
10366        let mut e = Engine::new();
10367        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10368        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
10369        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
10370            .unwrap();
10371        let bytes = e.snapshot();
10372        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
10373        let e2 = Engine::restore_envelope(&bytes).unwrap();
10374        assert_eq!(e2.users().len(), 2);
10375        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
10376        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
10377        assert_eq!(e2.verify_user("alice", "wrong"), None);
10378        assert_eq!(e2.catalog().table_count(), 1);
10379    }
10380
10381    #[test]
10382    fn ddl_inside_tx_also_rolled_back() {
10383        let mut e = Engine::new();
10384        e.execute("BEGIN").unwrap();
10385        e.execute("CREATE TABLE t (v INT)").unwrap();
10386        // Visible inside the TX.
10387        e.execute("SELECT * FROM t").unwrap();
10388        e.execute("ROLLBACK").unwrap();
10389        // Gone after rollback.
10390        let err = e.execute("SELECT * FROM t").unwrap_err();
10391        assert!(matches!(
10392            err,
10393            EngineError::Storage(StorageError::TableNotFound { .. })
10394        ));
10395    }
10396
10397    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
10398
10399    #[test]
10400    fn create_publication_lands_in_catalog() {
10401        let mut e = Engine::new();
10402        assert!(e.publications().is_empty());
10403        e.execute("CREATE PUBLICATION pub_a").unwrap();
10404        assert_eq!(e.publications().len(), 1);
10405        assert!(e.publications().contains("pub_a"));
10406    }
10407
10408    #[test]
10409    fn create_publication_duplicate_errors() {
10410        let mut e = Engine::new();
10411        e.execute("CREATE PUBLICATION pub_a").unwrap();
10412        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
10413        assert!(
10414            alloc::format!("{err:?}").contains("DuplicateName"),
10415            "got {err:?}"
10416        );
10417    }
10418
10419    #[test]
10420    fn drop_publication_silent_when_absent() {
10421        let mut e = Engine::new();
10422        // PG-compatible: DROP a publication that doesn't exist
10423        // succeeds (no-op) but reports zero affected.
10424        let r = e.execute("DROP PUBLICATION nope").unwrap();
10425        match r {
10426            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
10427            other => panic!("expected CommandOk, got {other:?}"),
10428        }
10429    }
10430
10431    #[test]
10432    fn drop_publication_present_reports_one_affected() {
10433        let mut e = Engine::new();
10434        e.execute("CREATE PUBLICATION pub_a").unwrap();
10435        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
10436        match r {
10437            QueryResult::CommandOk {
10438                affected,
10439                modified_catalog,
10440            } => {
10441                assert_eq!(affected, 1);
10442                assert!(modified_catalog);
10443            }
10444            other => panic!("expected CommandOk, got {other:?}"),
10445        }
10446        assert!(e.publications().is_empty());
10447    }
10448
10449    #[test]
10450    fn publications_persist_across_snapshot_restore() {
10451        // The persist-across-restart ship-gate at the engine layer —
10452        // snapshot → restore_envelope round trip must preserve the
10453        // publication catalog. The spg-server e2e covers the
10454        // process-restart variant.
10455        let mut e = Engine::new();
10456        e.execute("CREATE PUBLICATION pub_a").unwrap();
10457        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES")
10458            .unwrap();
10459        let snap = e.snapshot();
10460        let e2 = Engine::restore_envelope(&snap).unwrap();
10461        assert_eq!(e2.publications().len(), 2);
10462        assert!(e2.publications().contains("pub_a"));
10463        assert!(e2.publications().contains("pub_b"));
10464    }
10465
10466    #[test]
10467    fn create_publication_allowed_inside_transaction() {
10468        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
10469        // CREATE PUBLICATION inside a TX and the auto-commit
10470        // wrap path needs the same allowance.
10471        let mut e = Engine::new();
10472        e.execute("BEGIN").unwrap();
10473        e.execute("CREATE PUBLICATION pub_a").unwrap();
10474        e.execute("COMMIT").unwrap();
10475        assert!(e.publications().contains("pub_a"));
10476    }
10477
10478    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
10479
10480    #[test]
10481    fn create_publication_for_table_list_lands_with_scope() {
10482        let mut e = Engine::new();
10483        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
10484        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
10485        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
10486            .unwrap();
10487        let scope = e.publications().get("pub_a").cloned();
10488        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
10489            panic!("expected ForTables scope, got {scope:?}")
10490        };
10491        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
10492    }
10493
10494    #[test]
10495    fn create_publication_all_tables_except_lands_with_scope() {
10496        let mut e = Engine::new();
10497        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
10498            .unwrap();
10499        let scope = e.publications().get("pub_a").cloned();
10500        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
10501            panic!("expected AllTablesExcept scope, got {scope:?}")
10502        };
10503        assert_eq!(ts, alloc::vec!["t3".to_string()]);
10504    }
10505
10506    #[test]
10507    fn show_publications_empty_returns_zero_rows() {
10508        let e = Engine::new();
10509        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
10510        let QueryResult::Rows { rows, columns } = r else {
10511            panic!()
10512        };
10513        assert!(rows.is_empty());
10514        assert_eq!(columns.len(), 3);
10515        assert_eq!(columns[0].name, "name");
10516        assert_eq!(columns[1].name, "scope");
10517        assert_eq!(columns[2].name, "table_count");
10518    }
10519
10520    #[test]
10521    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
10522        let mut e = Engine::new();
10523        e.execute("CREATE PUBLICATION z_pub").unwrap();
10524        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
10525            .unwrap();
10526        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
10527            .unwrap();
10528        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
10529        let QueryResult::Rows { rows, .. } = r else {
10530            panic!()
10531        };
10532        assert_eq!(rows.len(), 3);
10533        // Alphabetical order: a_pub, m_pub, z_pub.
10534        let names: Vec<&str> = rows
10535            .iter()
10536            .map(|r| {
10537                if let Value::Text(s) = &r.values[0] {
10538                    s.as_str()
10539                } else {
10540                    panic!()
10541                }
10542            })
10543            .collect();
10544        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
10545        // Row 0 — a_pub scope summary + table_count = 2.
10546        match &rows[0].values[1] {
10547            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
10548            other => panic!("expected Text, got {other:?}"),
10549        }
10550        assert_eq!(rows[0].values[2], Value::Int(2));
10551        // Row 1 — m_pub.
10552        match &rows[1].values[1] {
10553            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
10554            other => panic!("expected Text, got {other:?}"),
10555        }
10556        assert_eq!(rows[1].values[2], Value::Int(1));
10557        // Row 2 — z_pub (AllTables → NULL count).
10558        match &rows[2].values[1] {
10559            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
10560            other => panic!("expected Text, got {other:?}"),
10561        }
10562        assert_eq!(rows[2].values[2], Value::Null);
10563    }
10564
10565    #[test]
10566    fn for_list_scopes_persist_across_snapshot() {
10567        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
10568        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
10569        let mut e = Engine::new();
10570        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
10571        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
10572            .unwrap();
10573        let snap = e.snapshot();
10574        let e2 = Engine::restore_envelope(&snap).unwrap();
10575        assert_eq!(e2.publications().len(), 2);
10576        let p1 = e2.publications().get("p1").cloned();
10577        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
10578            panic!("p1 scope lost: {p1:?}")
10579        };
10580        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
10581        let p2 = e2.publications().get("p2").cloned();
10582        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
10583            panic!("p2 scope lost: {p2:?}")
10584        };
10585        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
10586    }
10587
10588    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
10589
10590    #[test]
10591    fn create_subscription_lands_in_catalog_with_defaults() {
10592        let mut e = Engine::new();
10593        e.execute(
10594            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
10595        )
10596        .unwrap();
10597        let s = e.subscriptions().get("sub_a").cloned().expect("present");
10598        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
10599        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
10600        assert!(s.enabled);
10601        assert_eq!(s.last_received_pos, 0);
10602    }
10603
10604    #[test]
10605    fn create_subscription_duplicate_name_errors() {
10606        let mut e = Engine::new();
10607        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
10608            .unwrap();
10609        let err = e
10610            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
10611            .unwrap_err();
10612        assert!(
10613            alloc::format!("{err:?}").contains("DuplicateName"),
10614            "got {err:?}"
10615        );
10616    }
10617
10618    #[test]
10619    fn drop_subscription_silent_when_absent() {
10620        let mut e = Engine::new();
10621        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
10622        match r {
10623            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
10624            other => panic!("expected CommandOk, got {other:?}"),
10625        }
10626    }
10627
10628    #[test]
10629    fn subscription_advance_updates_last_pos_monotone() {
10630        let mut e = Engine::new();
10631        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
10632            .unwrap();
10633        assert!(e.subscription_advance("s", 100));
10634        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
10635        assert!(e.subscription_advance("s", 50)); // stale → ignored
10636        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
10637        assert!(e.subscription_advance("s", 200));
10638        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
10639        assert!(!e.subscription_advance("missing", 1));
10640    }
10641
10642    #[test]
10643    fn show_subscriptions_returns_rows_ordered_by_name() {
10644        let mut e = Engine::new();
10645        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
10646            .unwrap();
10647        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
10648            .unwrap();
10649        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
10650        let QueryResult::Rows { rows, columns } = r else {
10651            panic!()
10652        };
10653        assert_eq!(rows.len(), 2);
10654        assert_eq!(columns.len(), 5);
10655        assert_eq!(columns[0].name, "name");
10656        assert_eq!(columns[4].name, "last_received_pos");
10657        // Alphabetical: a_sub, z_sub.
10658        let names: Vec<&str> = rows
10659            .iter()
10660            .map(|r| {
10661                if let Value::Text(s) = &r.values[0] {
10662                    s.as_str()
10663                } else {
10664                    panic!()
10665                }
10666            })
10667            .collect();
10668        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
10669        // Row 0: a_sub
10670        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
10671        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
10672        assert_eq!(rows[0].values[3], Value::Bool(true));
10673        assert_eq!(rows[0].values[4], Value::BigInt(0));
10674        // Row 1: z_sub — publications join with ", "
10675        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
10676    }
10677
10678    #[test]
10679    fn subscriptions_persist_across_snapshot_envelope_v4() {
10680        let mut e = Engine::new();
10681        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
10682            .unwrap();
10683        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
10684            .unwrap();
10685        e.subscription_advance("s2", 42);
10686        let snap = e.snapshot();
10687        let e2 = Engine::restore_envelope(&snap).unwrap();
10688        assert_eq!(e2.subscriptions().len(), 2);
10689        let s1 = e2.subscriptions().get("s1").unwrap();
10690        assert_eq!(s1.conn_str, "h=A");
10691        assert_eq!(
10692            s1.publications,
10693            alloc::vec!["p1".to_string(), "p2".to_string()]
10694        );
10695        assert_eq!(s1.last_received_pos, 0);
10696        let s2 = e2.subscriptions().get("s2").unwrap();
10697        assert_eq!(s2.last_received_pos, 42);
10698    }
10699
10700    #[test]
10701    fn v3_envelope_loads_with_empty_subscriptions() {
10702        // v3 snapshot (publications-only). Forge it by hand so we
10703        // verify v6.1.4 readers don't panic — they must surface
10704        // empty subscriptions and a populated publication table.
10705        let mut e = Engine::new();
10706        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
10707        let catalog = e.catalog.serialize();
10708        let users = crate::users::serialize_users(&e.users);
10709        let pubs = e.publications.serialize();
10710        let mut buf = Vec::new();
10711        buf.extend_from_slice(b"SPGENV01");
10712        buf.push(3u8); // v3
10713        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
10714        buf.extend_from_slice(&catalog);
10715        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
10716        buf.extend_from_slice(&users);
10717        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
10718        buf.extend_from_slice(&pubs);
10719        let crc = spg_crypto::crc32::crc32(&buf);
10720        buf.extend_from_slice(&crc.to_le_bytes());
10721
10722        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
10723        assert!(e2.subscriptions().is_empty());
10724        assert!(e2.publications().contains("pub_legacy"));
10725    }
10726
10727    #[test]
10728    fn create_subscription_allowed_inside_transaction() {
10729        let mut e = Engine::new();
10730        e.execute("BEGIN").unwrap();
10731        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
10732            .unwrap();
10733        e.execute("COMMIT").unwrap();
10734        assert!(e.subscriptions().contains("s"));
10735    }
10736
10737    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
10738    #[test]
10739    fn analyze_populates_histogram_bounds() {
10740        let mut e = Engine::new();
10741        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)")
10742            .unwrap();
10743        for i in 0..50 {
10744            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'name{i}')"))
10745                .unwrap();
10746        }
10747        e.execute("ANALYZE t").unwrap();
10748        let stats = e.statistics();
10749        let id_stats = stats.get("t", "id").unwrap();
10750        assert!(id_stats.histogram_bounds.len() >= 2);
10751        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
10752        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
10753        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
10754        assert_eq!(id_stats.n_distinct, 50);
10755    }
10756
10757    #[test]
10758    fn reanalyze_overwrites_prior_stats() {
10759        let mut e = Engine::new();
10760        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10761        for i in 0..10 {
10762            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
10763                .unwrap();
10764        }
10765        e.execute("ANALYZE t").unwrap();
10766        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
10767        assert_eq!(n1, 10);
10768        for i in 10..30 {
10769            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
10770                .unwrap();
10771        }
10772        e.execute("ANALYZE t").unwrap();
10773        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
10774        assert_eq!(n2, 30);
10775    }
10776
10777    #[test]
10778    fn analyze_unknown_table_errors() {
10779        let mut e = Engine::new();
10780        let err = e.execute("ANALYZE nonexistent").unwrap_err();
10781        assert!(matches!(
10782            err,
10783            EngineError::Storage(StorageError::TableNotFound { .. })
10784        ));
10785    }
10786
10787    #[test]
10788    fn bare_analyze_covers_all_user_tables() {
10789        let mut e = Engine::new();
10790        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
10791        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
10792        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
10793        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
10794        let r = e.execute("ANALYZE").unwrap();
10795        match r {
10796            QueryResult::CommandOk {
10797                affected,
10798                modified_catalog,
10799            } => {
10800                assert_eq!(affected, 2);
10801                assert!(modified_catalog);
10802            }
10803            other => panic!("expected CommandOk, got {other:?}"),
10804        }
10805        assert!(e.statistics().get("t1", "id").is_some());
10806        assert!(e.statistics().get("t2", "name").is_some());
10807    }
10808
10809    #[test]
10810    fn select_from_spg_statistic_returns_rows_per_column() {
10811        let mut e = Engine::new();
10812        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
10813            .unwrap();
10814        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
10815        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
10816        e.execute("ANALYZE t").unwrap();
10817        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
10818        let QueryResult::Rows { rows, columns } = r else {
10819            panic!()
10820        };
10821        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
10822        assert_eq!(columns.len(), 6);
10823        assert_eq!(columns[0].name, "table_name");
10824        assert_eq!(columns[4].name, "histogram_bounds");
10825        assert_eq!(columns[5].name, "cold_row_count");
10826        assert_eq!(rows.len(), 2, "one row per column of t");
10827        // Sorted by (table_name, column_name).
10828        match (&rows[0].values[0], &rows[0].values[1]) {
10829            (Value::Text(t), Value::Text(c)) => {
10830                assert_eq!(t, "t");
10831                // BTreeMap orders (table, column); columns "id" < "label".
10832                assert_eq!(c, "id");
10833            }
10834            _ => panic!(),
10835        }
10836    }
10837
10838    #[test]
10839    fn analyze_skips_vector_columns() {
10840        // Vector columns have their own stats shape (HNSW graph);
10841        // ANALYZE leaves them out of spg_statistic.
10842        let mut e = Engine::new();
10843        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
10844            .unwrap();
10845        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
10846        e.execute("ANALYZE t").unwrap();
10847        assert!(e.statistics().get("t", "id").is_some());
10848        assert!(e.statistics().get("t", "v").is_none());
10849    }
10850
10851    #[test]
10852    fn statistics_persist_across_envelope_v5_round_trip() {
10853        let mut e = Engine::new();
10854        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10855        for i in 0..20 {
10856            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
10857                .unwrap();
10858        }
10859        e.execute("ANALYZE").unwrap();
10860        let snap = e.snapshot();
10861        let e2 = Engine::restore_envelope(&snap).unwrap();
10862        let s = e2.statistics().get("t", "id").unwrap();
10863        assert_eq!(s.n_distinct, 20);
10864    }
10865
10866    // ── v6.2.1 auto-analyze threshold ───────────────────────────
10867
10868    #[test]
10869    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
10870        // For a table with 0 rows then 10 inserts → modified=10,
10871        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
10872        // after the 10th INSERT the threshold is met.
10873        let mut e = Engine::new();
10874        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10875        for i in 0..9 {
10876            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
10877                .unwrap();
10878        }
10879        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
10880        e.execute("INSERT INTO t VALUES (9)").unwrap();
10881        let needs = e.tables_needing_analyze();
10882        assert_eq!(needs, alloc::vec!["t".to_string()]);
10883    }
10884
10885    #[test]
10886    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
10887        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
10888        // Each new INSERT bumps both modified and row_count, so to
10889        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
10890        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
10891        // more (200 total mods, row_count=1200, threshold=120 → fire).
10892        let mut e = Engine::new();
10893        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10894        for i in 0..1000 {
10895            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
10896                .unwrap();
10897        }
10898        e.execute("ANALYZE t").unwrap();
10899        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
10900        for i in 1000..1050 {
10901            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
10902                .unwrap();
10903        }
10904        assert!(
10905            e.tables_needing_analyze().is_empty(),
10906            "50 inserts < threshold of ~105"
10907        );
10908        for i in 1050..1200 {
10909            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
10910                .unwrap();
10911        }
10912        assert_eq!(
10913            e.tables_needing_analyze(),
10914            alloc::vec!["t".to_string()],
10915            "200 inserts > 0.1 × 1200 threshold"
10916        );
10917    }
10918
10919    #[test]
10920    fn auto_analyze_threshold_resets_after_analyze() {
10921        let mut e = Engine::new();
10922        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10923        for i in 0..200 {
10924            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
10925                .unwrap();
10926        }
10927        assert!(!e.tables_needing_analyze().is_empty());
10928        e.execute("ANALYZE").unwrap();
10929        assert!(
10930            e.tables_needing_analyze().is_empty(),
10931            "ANALYZE must reset the counter"
10932        );
10933    }
10934
10935    #[test]
10936    fn auto_analyze_threshold_tracks_updates_and_deletes() {
10937        let mut e = Engine::new();
10938        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
10939            .unwrap();
10940        for i in 0..50 {
10941            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
10942                .unwrap();
10943        }
10944        e.execute("ANALYZE t").unwrap();
10945        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
10946        // × max(50, 100) = 10. So 25 >= 10 → trigger.
10947        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
10948        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
10949        assert_eq!(e.tables_needing_analyze(), alloc::vec!["t".to_string()]);
10950    }
10951
10952    #[test]
10953    fn v4_envelope_loads_with_empty_statistics() {
10954        // Forge a v4 envelope by hand: catalog + users + pubs +
10955        // subs trailer, no statistics. A v6.2.0 reader must accept
10956        // it and surface an empty Statistics.
10957        let mut e = Engine::new();
10958        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
10959            .unwrap();
10960        let catalog = e.catalog.serialize();
10961        let users = crate::users::serialize_users(&e.users);
10962        let pubs = e.publications.serialize();
10963        let subs = e.subscriptions.serialize();
10964        let mut buf = Vec::new();
10965        buf.extend_from_slice(b"SPGENV01");
10966        buf.push(4u8);
10967        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
10968        buf.extend_from_slice(&catalog);
10969        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
10970        buf.extend_from_slice(&users);
10971        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
10972        buf.extend_from_slice(&pubs);
10973        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
10974        buf.extend_from_slice(&subs);
10975        let crc = spg_crypto::crc32::crc32(&buf);
10976        buf.extend_from_slice(&crc.to_le_bytes());
10977        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
10978        assert!(e2.statistics().is_empty());
10979    }
10980
10981    #[test]
10982    fn v1_v2_envelope_loads_with_empty_publications() {
10983        // A snapshot taken before v6.1.2 (no publication trailer,
10984        // envelope v2) must still deserialise — and the resulting
10985        // engine must report zero publications. Use the engine's own
10986        // round-trip with no publications: that emits v3 but with an
10987        // empty pubs block. Then forge a v2 envelope by hand to lock
10988        // the back-compat path.
10989        let mut e = Engine::new();
10990        // Force users to be non-empty so the snapshot takes the
10991        // envelope path rather than the bare-catalog fallback.
10992        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
10993            .unwrap();
10994
10995        // Forge an envelope v2: same shape as v3 but no pubs trailer.
10996        let catalog = e.catalog.serialize();
10997        let users = crate::users::serialize_users(&e.users);
10998        let mut buf = Vec::new();
10999        buf.extend_from_slice(b"SPGENV01");
11000        buf.push(2u8); // v2
11001        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
11002        buf.extend_from_slice(&catalog);
11003        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
11004        buf.extend_from_slice(&users);
11005        let crc = spg_crypto::crc32::crc32(&buf);
11006        buf.extend_from_slice(&crc.to_le_bytes());
11007
11008        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
11009        assert!(e2.publications().is_empty());
11010    }
11011}