Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod describe;
11pub mod eval;
12pub mod fts;
13pub mod json;
14pub mod memoize;
15pub mod plan_cache;
16pub mod publications;
17pub mod query_stats;
18pub mod reorder;
19pub mod selectivity;
20pub mod statistics;
21pub mod subscriptions;
22pub mod triggers;
23pub mod users;
24
25pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
26
27use alloc::borrow::Cow;
28use alloc::boxed::Box;
29use alloc::collections::BTreeMap;
30use alloc::string::{String, ToString};
31use alloc::vec::Vec;
32use core::fmt;
33
34use spg_sql::ast::{
35    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement, CreatePublicationStatement,
36    CreateSubscriptionStatement, CreateTableStatement, CreateUserStatement, Expr, FrameBound,
37    FrameKind, FromClause, IndexMethod, InsertStatement, JoinKind, Literal, OrderBy, SelectItem,
38    SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding as SqlVecEncoding,
39    WindowFrame,
40};
41// v7.16.0 — re-export the parsed-statement AST so downstream
42// crates (spg-embedded → spg-sqlx) don't need a direct dep on
43// spg-sql for the prepare/bind handle.
44pub use spg_sql::ast::Statement as ParsedStatement;
45use spg_sql::parser::{self, ParseError};
46use spg_storage::{
47    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
48    TableSchema, Value, VecEncoding,
49};
50
51use crate::eval::{EvalContext, EvalError};
52
53/// Result of executing one statement.
54#[derive(Debug, Clone, PartialEq)]
55#[non_exhaustive]
56pub enum QueryResult {
57    /// DDL or DML succeeded.
58    ///
59    /// `affected` is the row count for `INSERT` and 0 elsewhere.
60    /// `modified_catalog` tells the server whether this statement
61    /// caused the *committed* catalog to change — it's the signal to
62    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
63    /// statements executed inside a transaction (those only touch the
64    /// shadow), and true for `COMMIT` and for writes outside a TX.
65    CommandOk {
66        affected: usize,
67        modified_catalog: bool,
68    },
69    /// `SELECT` returned a (possibly empty) row set.
70    Rows {
71        columns: Vec<ColumnSchema>,
72        rows: Vec<Row>,
73    },
74}
75
76/// All errors the engine can return.
77///
78/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
79/// must include a `_` arm so new variants in subsequent v7.x releases
80/// are not breaking changes.
81#[derive(Debug, Clone, PartialEq)]
82#[non_exhaustive]
83pub enum EngineError {
84    Parse(ParseError),
85    Storage(StorageError),
86    Eval(EvalError),
87    /// Front-end accepted a construct that the v0.x executor doesn't support.
88    Unsupported(String),
89    /// `BEGIN` while another transaction is already open.
90    TransactionAlreadyOpen,
91    /// `COMMIT` / `ROLLBACK` with no active transaction.
92    NoActiveTransaction,
93    /// v4.0 sentinel: `execute_readonly` got a statement that
94    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
95    /// The caller should retake the write lock and dispatch through
96    /// `execute(&mut self)` instead.
97    WriteRequired,
98    /// v4.2: a SELECT would have returned more rows than the
99    /// configured `max_query_rows` cap. Carries the cap.
100    RowLimitExceeded(usize),
101    /// v4.5: cooperative cancellation — the host (server's
102    /// per-query watchdog) set the cancel flag while a long-running
103    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
104    /// is discarded; the caller should surface this as a timeout
105    /// to the client.
106    Cancelled,
107}
108
109impl fmt::Display for EngineError {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        match self {
112            Self::Parse(e) => write!(f, "parse: {e}"),
113            Self::Storage(e) => write!(f, "storage: {e}"),
114            Self::Eval(e) => write!(f, "eval: {e}"),
115            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
116            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
117            Self::NoActiveTransaction => f.write_str("no active transaction"),
118            Self::WriteRequired => {
119                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
120            }
121            Self::RowLimitExceeded(n) => {
122                write!(f, "query exceeded max_query_rows={n}")
123            }
124            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
125        }
126    }
127}
128
129impl From<ParseError> for EngineError {
130    fn from(e: ParseError) -> Self {
131        Self::Parse(e)
132    }
133}
134impl From<StorageError> for EngineError {
135    fn from(e: StorageError) -> Self {
136        Self::Storage(e)
137    }
138}
139impl From<EvalError> for EngineError {
140    fn from(e: EvalError) -> Self {
141        Self::Eval(e)
142    }
143}
144
145/// The execution engine. Holds the catalog and (later) other server-scope
146/// state. `Engine::new()` is intentionally cheap so callers can construct one
147/// per database, per test.
148/// Function pointer that returns "now" as microseconds since Unix
149/// epoch. The engine is `no_std`, so it can't reach for `std::time`
150/// itself — callers (`spg-server`, the sqllogictest runner) inject a
151/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
152/// `Unsupported`.
153pub type ClockFn = fn() -> i64;
154
155/// Function pointer that produces 16 cryptographically random bytes.
156/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
157/// itself — host (`spg-server`) injects an OS-backed source. `None`
158/// means SQL-driven `CREATE USER` falls back to a deterministic salt
159/// derived from the username (acceptable in tests; the server always
160/// installs a real RNG so production paths never see this).
161pub type SaltFn = fn() -> [u8; 16];
162
163/// v4.5 cooperative cancellation token. A long-running SELECT /
164/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
165/// and bails with `EngineError::Cancelled`. The host
166/// (`spg-server`) creates an `AtomicBool` per query, spawns a
167/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
168/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
169///
170/// `CancelToken::none()` is a no-op — used by the legacy `execute`
171/// and `execute_readonly` entry points so existing callers don't
172/// change.
173#[derive(Debug, Clone, Copy)]
174pub struct CancelToken<'a> {
175    flag: Option<&'a core::sync::atomic::AtomicBool>,
176}
177
178impl<'a> CancelToken<'a> {
179    #[must_use]
180    pub const fn none() -> Self {
181        Self { flag: None }
182    }
183
184    #[must_use]
185    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
186        Self { flag: Some(f) }
187    }
188
189    #[must_use]
190    pub fn is_cancelled(self) -> bool {
191        self.flag
192            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
193    }
194
195    /// Returns `Err(Cancelled)` if the token has been tripped.
196    /// Used at row-loop checkpoints to bail cooperatively without
197    /// scattering raw `is_cancelled` checks across the executor.
198    #[inline]
199    pub fn check(self) -> Result<(), EngineError> {
200        if self.is_cancelled() {
201            Err(EngineError::Cancelled)
202        } else {
203            Ok(())
204        }
205    }
206}
207
208// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
209// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
210//
211// Wraps a catalog blob + a user blob behind a small header so the
212// server can persist both atomically without inventing a new file.
213// Bare catalog blobs (v3.x) still load via `restore_envelope` since
214// the magic check fails fast and the function falls back to
215// `Catalog::deserialize`.
216//
217// Layout — v1 (v4.1, no CRC):
218//   [8 bytes magic "SPGENV01"]
219//   [u8 version = 1]
220//   [u32 catalog_len][catalog bytes]
221//   [u32 users_len][users bytes]
222//
223// Layout — v2 (v4.37, CRC32 of body):
224//   [8 bytes magic "SPGENV01"]
225//   [u8 version = 2]
226//   [u32 catalog_len][catalog bytes]
227//   [u32 users_len][users bytes]
228//   [u32 crc32]                      ← CRC32 of every byte before it.
229//
230// Layout — v3 (v6.1.2, publications trailer):
231//   [8 bytes magic "SPGENV01"]
232//   [u8 version = 3]
233//   [u32 catalog_len][catalog bytes]
234//   [u32 users_len][users bytes]
235//   [u32 pubs_len][publications bytes]
236//   [u32 crc32]
237//
238// Layout — v4 (v6.1.4, subscriptions trailer):
239//   [8 bytes magic "SPGENV01"]
240//   [u8 version = 4]
241//   [u32 catalog_len][catalog bytes]
242//   [u32 users_len][users bytes]
243//   [u32 pubs_len][publications bytes]
244//   [u32 subs_len][subscriptions bytes]
245//   [u32 crc32]
246//
247// Layout — v5 (v6.2.0, statistics trailer):
248//   [8 bytes magic "SPGENV01"]
249//   [u8 version = 5]
250//   [u32 catalog_len][catalog bytes]
251//   [u32 users_len][users bytes]
252//   [u32 pubs_len][publications bytes]
253//   [u32 subs_len][subscriptions bytes]
254//   [u32 stats_len][statistics bytes]      ← NEW
255//   [u32 crc32]
256//
257// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
258// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
259// statistics; v3 loads with empty subscriptions + statistics; v4
260// loads with empty statistics; v5 deserialises all three. Older
261// SPG versions reading a v5 envelope fall through the version
262// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
263// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
264
265const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
266const ENVELOPE_VERSION_V1: u8 = 1;
267const ENVELOPE_VERSION_V2: u8 = 2;
268const ENVELOPE_VERSION_V3: u8 = 3;
269const ENVELOPE_VERSION_V4: u8 = 4;
270const ENVELOPE_VERSION_V5: u8 = 5;
271
272fn build_envelope(catalog: &[u8], users: &[u8], pubs: &[u8], subs: &[u8], stats: &[u8]) -> Vec<u8> {
273    let mut out = Vec::with_capacity(
274        8 + 1
275            + 4
276            + catalog.len()
277            + 4
278            + users.len()
279            + 4
280            + pubs.len()
281            + 4
282            + subs.len()
283            + 4
284            + stats.len()
285            + 4,
286    );
287    out.extend_from_slice(ENVELOPE_MAGIC);
288    out.push(ENVELOPE_VERSION_V5);
289    out.extend_from_slice(
290        &u32::try_from(catalog.len())
291            .expect("≤ 4G catalog")
292            .to_le_bytes(),
293    );
294    out.extend_from_slice(catalog);
295    out.extend_from_slice(
296        &u32::try_from(users.len())
297            .expect("≤ 4G users")
298            .to_le_bytes(),
299    );
300    out.extend_from_slice(users);
301    out.extend_from_slice(
302        &u32::try_from(pubs.len())
303            .expect("≤ 4G publications")
304            .to_le_bytes(),
305    );
306    out.extend_from_slice(pubs);
307    out.extend_from_slice(
308        &u32::try_from(subs.len())
309            .expect("≤ 4G subscriptions")
310            .to_le_bytes(),
311    );
312    out.extend_from_slice(subs);
313    out.extend_from_slice(
314        &u32::try_from(stats.len())
315            .expect("≤ 4G statistics")
316            .to_le_bytes(),
317    );
318    out.extend_from_slice(stats);
319    let crc = spg_crypto::crc32::crc32(&out);
320    out.extend_from_slice(&crc.to_le_bytes());
321    out
322}
323
324/// Outcome of envelope parsing: either bare-catalog fallback, a
325/// successfully split section trio from a v1/v2/v3 envelope, or an
326/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
327/// (catalog-only fallback) preserves v3.x readability. v1/v2
328/// envelopes set `publications` to `None`; v3 sets it to the
329/// publications byte slice.
330enum EnvelopeParse<'a> {
331    Bare,
332    Pair {
333        catalog: &'a [u8],
334        users: &'a [u8],
335        publications: Option<&'a [u8]>,
336        subscriptions: Option<&'a [u8]>,
337        statistics: Option<&'a [u8]>,
338    },
339    CrcMismatch {
340        expected: u32,
341        computed: u32,
342    },
343}
344
345/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
346/// `Bare` for a buffer that doesn't look like an envelope (v3.x
347/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
348/// whose trailing CRC32 doesn't match the body.
349fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
350    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
351        return EnvelopeParse::Bare;
352    }
353    let version = buf[8];
354    if !matches!(
355        version,
356        ENVELOPE_VERSION_V1
357            | ENVELOPE_VERSION_V2
358            | ENVELOPE_VERSION_V3
359            | ENVELOPE_VERSION_V4
360            | ENVELOPE_VERSION_V5
361    ) {
362        return EnvelopeParse::Bare;
363    }
364    let mut p = 9usize;
365    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
366        return EnvelopeParse::Bare;
367    };
368    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
369        return EnvelopeParse::Bare;
370    };
371    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
372    p += 4;
373    if p + cat_len + 4 > buf.len() {
374        return EnvelopeParse::Bare;
375    }
376    let catalog = &buf[p..p + cat_len];
377    p += cat_len;
378    let Some(user_len_bytes) = buf.get(p..p + 4) else {
379        return EnvelopeParse::Bare;
380    };
381    let Ok(user_len_arr) = user_len_bytes.try_into() else {
382        return EnvelopeParse::Bare;
383    };
384    let user_len = u32::from_le_bytes(user_len_arr) as usize;
385    p += 4;
386    if p + user_len > buf.len() {
387        return EnvelopeParse::Bare;
388    }
389    let users = &buf[p..p + user_len];
390    p += user_len;
391    let publications = if matches!(
392        version,
393        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
394    ) {
395        // [u32 pubs_len][publications bytes]
396        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
397            return EnvelopeParse::Bare;
398        };
399        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
400            return EnvelopeParse::Bare;
401        };
402        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
403        p += 4;
404        if p + pubs_len > buf.len() {
405            return EnvelopeParse::Bare;
406        }
407        let pubs_slice = &buf[p..p + pubs_len];
408        p += pubs_len;
409        Some(pubs_slice)
410    } else {
411        None
412    };
413    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
414        // [u32 subs_len][subscriptions bytes]
415        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
416            return EnvelopeParse::Bare;
417        };
418        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
419            return EnvelopeParse::Bare;
420        };
421        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
422        p += 4;
423        if p + subs_len > buf.len() {
424            return EnvelopeParse::Bare;
425        }
426        let subs_slice = &buf[p..p + subs_len];
427        p += subs_len;
428        Some(subs_slice)
429    } else {
430        None
431    };
432    let statistics = if version == ENVELOPE_VERSION_V5 {
433        // [u32 stats_len][statistics bytes]
434        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
435            return EnvelopeParse::Bare;
436        };
437        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
438            return EnvelopeParse::Bare;
439        };
440        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
441        p += 4;
442        if p + stats_len > buf.len() {
443            return EnvelopeParse::Bare;
444        }
445        let stats_slice = &buf[p..p + stats_len];
446        p += stats_len;
447        Some(stats_slice)
448    } else {
449        None
450    };
451    if matches!(
452        version,
453        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
454    ) {
455        if p + 4 != buf.len() {
456            return EnvelopeParse::Bare;
457        }
458        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
459            return EnvelopeParse::Bare;
460        };
461        let expected = u32::from_le_bytes(crc_arr);
462        let computed = spg_crypto::crc32::crc32(&buf[..p]);
463        if expected != computed {
464            return EnvelopeParse::CrcMismatch { expected, computed };
465        }
466    } else if p != buf.len() {
467        // v1: must end exactly at the users section.
468        return EnvelopeParse::Bare;
469    }
470    EnvelopeParse::Pair {
471        catalog,
472        users,
473        publications,
474        subscriptions,
475        statistics,
476    }
477}
478
479/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
480/// threaded through `Engine::execute_in` so dispatch can identify which
481/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
482/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
483/// startup replay — implicitly uses through the unchanged
484/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
485/// runtime (dispatch holds `engine.write()` across the wrap, same as
486/// v4.34); the map shape is here to let v4.42 turn on N in-flight
487/// implicit TXs without reshuffling the engine internals.
488#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
489pub struct TxId(pub u64);
490
491/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
492/// global-shadow path. New `alloc_tx_id` handles start at 1.
493pub const IMPLICIT_TX: TxId = TxId(0);
494
495/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
496/// SEGMENTS` when no explicit target is supplied. Segments whose
497/// `OwnedSegment::bytes().len()` is **strictly** less than this
498/// value are eligible to merge. spg-server reads
499/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
500pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
501
502/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
503/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
504/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
505/// rolls back (slot removed, catalog discarded).
506#[derive(Debug, Default, Clone)]
507struct TxState {
508    /// The TX's shadow copy of the catalog. Started as a clone of
509    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
510    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
511    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
512    catalog: Catalog,
513    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
514    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
515    /// `ROLLBACK TO <name>` restores from the entry and pops everything
516    /// after it; `RELEASE <name>` discards the entry and everything
517    /// after; COMMIT/ROLLBACK clears the whole stack.
518    savepoints: Vec<(String, Catalog)>,
519}
520
521/// v7.11.0 — frozen read-only view of the engine's committed state.
522/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
523/// catalog, statistics, clock function, and row-cap config — the
524/// four fields the `execute_readonly` path actually reads. Cheap to
525/// `Clone` (each clone shares the underlying `PersistentVec` row
526/// storage; only the trie root pointers copy). Send + Sync so a
527/// snapshot can be moved across `tokio::task::spawn_blocking`
528/// boundaries without coordination.
529///
530/// The contract: a snapshot reflects the engine's state at the
531/// moment `clone_snapshot()` returned. Subsequent writes to the
532/// engine are NOT visible. Callers who need fresher data take a
533/// new snapshot.
534#[derive(Debug, Clone)]
535pub struct CatalogSnapshot {
536    catalog: Catalog,
537    statistics: statistics::Statistics,
538    clock: Option<ClockFn>,
539    max_query_rows: Option<usize>,
540}
541
542#[derive(Debug, Default)]
543pub struct Engine {
544    /// Committed catalog — what survives `Engine::snapshot()` and what
545    /// outside-TX `SELECT`s read.
546    catalog: Catalog,
547    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
548    /// v4.41.1 runtime invariant: at most one entry (single-writer
549    /// model unchanged). v4.42 will let dispatch hold multiple entries
550    /// concurrently for group commit + engine MVCC.
551    tx_catalogs: BTreeMap<TxId, TxState>,
552    /// Which slot the next exec_* call should mutate. Set by
553    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
554    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
555    /// write goes straight against `catalog`).
556    current_tx: Option<TxId>,
557    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
558    /// reserved for `IMPLICIT_TX`.
559    next_tx_id: u64,
560    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
561    /// / `CURRENT_DATE`. Set by the host environment.
562    clock: Option<ClockFn>,
563    /// v4.1 cryptographic RNG for per-user password salt. Set by the
564    /// host. `None` means SQL-driven `CREATE USER` uses a
565    /// deterministic fallback — see `SaltFn`.
566    salt_fn: Option<SaltFn>,
567    /// v4.2 per-query row cap. `None` = unlimited. When set, a
568    /// SELECT that materialises more than `n` rows returns
569    /// `EngineError::RowLimitExceeded`. Enforced before the result
570    /// is shaped into wire frames so a runaway scan can't blow the
571    /// server's heap.
572    max_query_rows: Option<usize>,
573    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
574    /// the server decides what that means at the auth boundary
575    /// (open mode vs legacy single-password mode). User CRUD goes
576    /// through `create_user`/`drop_user`/`verify_user`; persistence
577    /// rides the snapshot envelope alongside the catalog.
578    users: UserStore,
579    /// v6.1.2 logical-replication publication catalog. Empty until
580    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
581    /// trailer (see `build_envelope`).
582    publications: publications::Publications,
583    /// v6.1.4 logical-replication subscription catalog. Empty until
584    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
585    /// trailer.
586    subscriptions: subscriptions::Subscriptions,
587    /// v6.2.0 — per-column statistics for the cost-based optimizer.
588    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
589    /// table. Persistence rides the v5 envelope trailer.
590    statistics: statistics::Statistics,
591    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
592    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
593    /// the snapshot envelope (rebuilt on demand after restart).
594    plan_cache: plan_cache::PlanCache,
595    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
596    /// surfaced via `spg_stat_query` virtual table. Updated by the
597    /// `execute_*` paths after a successful execute.
598    query_stats: query_stats::QueryStats,
599    /// v6.5.2 — connection-state provider callback. spg-server
600    /// registers a function at startup that snapshots its
601    /// per-pgwire-connection registry into `ActivityRow`s; engine
602    /// reads through it on every `SELECT * FROM spg_stat_activity`.
603    /// `None` ⇒ no-data (returns empty rows; matches the no_std
604    /// embedded callers that don't run pgwire).
605    activity_provider: Option<ActivityProvider>,
606    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
607    /// activity_provider: spg-server registers both at startup;
608    /// engine reads through on `SELECT * FROM spg_audit_chain` and
609    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
610    audit_chain_provider: Option<AuditChainProvider>,
611    audit_verifier: Option<AuditVerifier>,
612    /// v6.5.6 — slow-query log threshold in microseconds. When set,
613    /// every successful execute whose elapsed exceeds the threshold
614    /// gets fed to the registered slow-query log callback (so
615    /// spg-server can emit a structured log line). Default `None`
616    /// = no slow-query logging.
617    slow_query_threshold_us: Option<u64>,
618    slow_query_logger: Option<SlowQueryLogger>,
619    /// v7.12.1 — session parameters set via `SET <name> = <value>`.
620    /// Only `default_text_search_config` is consumed by the engine
621    /// today (the FTS function dispatcher reads it when
622    /// `to_tsvector(text)` is called without an explicit config).
623    /// All other names are accepted + recorded so PG-dump output
624    /// loads, but have no behavioural effect.
625    session_params: BTreeMap<String, String>,
626    /// v7.12.7 — depth counter for trigger-emitted embedded SQL.
627    /// Each time the engine executes a `DeferredEmbeddedStmt` it
628    /// increments this; the recursive `execute_stmt_with_cancel`
629    /// inside that path checks against [`MAX_TRIGGER_RECURSION`]
630    /// to bound runaway cascades (trigger A's UPDATE on table B
631    /// fires trigger B which UPDATEs table A which fires trigger
632    /// A again…). Reset to 0 once the original DML returns.
633    trigger_recursion_depth: u32,
634    /// v7.14.0 — when `SET FOREIGN_KEY_CHECKS=0` is in effect
635    /// (mysqldump preamble), the FK existence + arity check at
636    /// CREATE TABLE time is deferred. FKs referencing a
637    /// not-yet-existing parent land in `pending_foreign_keys`
638    /// keyed by child table; `SET FOREIGN_KEY_CHECKS=1` drains
639    /// the queue and resolves each FK against the now-complete
640    /// catalog. Empty by default; the queue is drained on every
641    /// `RESET ALL` too.
642    foreign_key_checks: bool,
643    pending_foreign_keys: Vec<(alloc::string::String, spg_sql::ast::ForeignKeyConstraint)>,
644}
645
646/// v7.12.7 — hard cap on nested trigger-emitted embedded SQL
647/// fires. 16 deep is well past anything a normal trigger graph
648/// uses while still preventing infinite-loop wedging.
649const MAX_TRIGGER_RECURSION: u32 = 16;
650
651/// v6.5.6 — callback signature for slow-query log emission. Called
652/// with `(sql, elapsed_us)` once per successful execute that crosses
653/// the threshold.
654pub type SlowQueryLogger = fn(&str, u64);
655
656/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
657/// state. Round-trips through `Engine::execute` to recreate the
658/// same schema (sans data + indexes — indexes are emitted as a
659/// separate `CREATE INDEX` chain in `spg_database_ddl`).
660fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
661    let mut out = alloc::format!("CREATE TABLE {name} (");
662    for (i, col) in columns.iter().enumerate() {
663        if i > 0 {
664            out.push_str(", ");
665        }
666        out.push_str(&col.name);
667        out.push(' ');
668        out.push_str(&render_data_type(col.ty));
669        if !col.nullable {
670            out.push_str(" NOT NULL");
671        }
672        if col.auto_increment {
673            out.push_str(" AUTO_INCREMENT");
674        }
675    }
676    out.push(')');
677    out
678}
679
680fn render_data_type(ty: DataType) -> String {
681    match ty {
682        DataType::SmallInt => "SMALLINT".into(),
683        DataType::Int => "INT".into(),
684        DataType::BigInt => "BIGINT".into(),
685        DataType::Float => "FLOAT".into(),
686        DataType::Text => "TEXT".into(),
687        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
688        DataType::Char(n) => alloc::format!("CHAR({n})"),
689        DataType::Bool => "BOOL".into(),
690        DataType::Vector { dim, encoding } => match encoding {
691            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
692            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
693            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
694        },
695        DataType::Numeric { precision, scale } => {
696            alloc::format!("NUMERIC({precision},{scale})")
697        }
698        DataType::Date => "DATE".into(),
699        DataType::Timestamp => "TIMESTAMP".into(),
700        DataType::Interval => "INTERVAL".into(),
701        DataType::Json => "JSON".into(),
702        DataType::Jsonb => "JSONB".into(),
703        DataType::Timestamptz => "TIMESTAMPTZ".into(),
704        DataType::Bytes => "BYTEA".into(),
705        DataType::TextArray => "TEXT[]".into(),
706        DataType::IntArray => "INT[]".into(),
707        DataType::BigIntArray => "BIGINT[]".into(),
708        DataType::TsVector => "TSVECTOR".into(),
709        DataType::TsQuery => "TSQUERY".into(),
710    }
711}
712
713/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
714/// spg-server can construct rows without re-exporting internal
715/// dispatch types.
716#[derive(Debug, Clone)]
717pub struct ActivityRow {
718    pub pid: u32,
719    pub user: String,
720    pub started_at_us: i64,
721    pub current_sql: String,
722    pub wait_event: String,
723    pub elapsed_us: i64,
724    pub in_transaction: bool,
725}
726
727/// v6.5.2 — provider callback type. Fresh snapshot returned each
728/// call; engine doesn't cache the slice.
729pub type ActivityProvider = fn() -> Vec<ActivityRow>;
730
731/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
732/// spg-server can construct rows directly from `AuditEntry`.
733#[derive(Debug, Clone)]
734pub struct AuditRow {
735    pub seq: i64,
736    pub ts_ms: i64,
737    pub prev_hash_hex: String,
738    pub entry_hash_hex: String,
739    pub sql: String,
740}
741
742/// v6.5.3 — chain-table provider + verifier. spg-server registers
743/// fn pointers that snapshot / verify the audit log. `verify`
744/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
745/// `-1` on a clean chain.
746pub type AuditChainProvider = fn() -> Vec<AuditRow>;
747pub type AuditVerifier = fn() -> (i64, i64);
748
749impl Engine {
750    pub fn new() -> Self {
751        Self {
752            catalog: Catalog::new(),
753            tx_catalogs: BTreeMap::new(),
754            current_tx: None,
755            next_tx_id: 1,
756            clock: None,
757            salt_fn: None,
758            max_query_rows: None,
759            users: UserStore::new(),
760            publications: publications::Publications::new(),
761            subscriptions: subscriptions::Subscriptions::new(),
762            statistics: statistics::Statistics::new(),
763            plan_cache: plan_cache::PlanCache::new(),
764            query_stats: query_stats::QueryStats::new(),
765            activity_provider: None,
766            audit_chain_provider: None,
767            audit_verifier: None,
768            slow_query_threshold_us: None,
769            slow_query_logger: None,
770            session_params: BTreeMap::new(),
771            trigger_recursion_depth: 0,
772            foreign_key_checks: true,
773            pending_foreign_keys: Vec::new(),
774        }
775    }
776
777    /// v7.11.0 — clone the engine's committed catalog + read-time
778    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
779    /// backed by `PersistentVec`; cloning is O(log n) per table).
780    /// Subsequent writes to this engine are invisible to the
781    /// snapshot; the snapshot is self-contained and can be moved
782    /// to another thread for concurrent `execute_readonly_on_snapshot`
783    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
784    /// and any other read-fanout pattern.
785    #[must_use]
786    pub fn clone_snapshot(&self) -> CatalogSnapshot {
787        CatalogSnapshot {
788            catalog: self.active_catalog().clone(),
789            statistics: self.statistics.clone(),
790            clock: self.clock,
791            max_query_rows: self.max_query_rows,
792        }
793    }
794
795    /// v7.11.1 — execute a read-only SQL statement against a
796    /// `CatalogSnapshot` without touching this engine. Same
797    /// semantics as `execute_readonly` but parameterised on the
798    /// snapshot's catalog. Reject DDL/DML the same way
799    /// `execute_readonly` does. Static-on-Self so the caller can
800    /// dispatch without holding an `Engine` borrow alongside the
801    /// snapshot.
802    pub fn execute_readonly_on_snapshot(
803        snapshot: &CatalogSnapshot,
804        sql: &str,
805    ) -> Result<QueryResult, EngineError> {
806        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
807    }
808
809    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
810    /// cancellation. Builds a transient `Engine` over the snapshot
811    /// state, runs `execute_readonly_with_cancel`, drops. The
812    /// transient engine is cheap to construct (no I/O; everything
813    /// is just struct moves) and lets the existing read path stay
814    /// untouched.
815    pub fn execute_readonly_on_snapshot_with_cancel(
816        snapshot: &CatalogSnapshot,
817        sql: &str,
818        cancel: CancelToken<'_>,
819    ) -> Result<QueryResult, EngineError> {
820        let transient = Engine {
821            catalog: snapshot.catalog.clone(),
822            statistics: snapshot.statistics.clone(),
823            clock: snapshot.clock,
824            max_query_rows: snapshot.max_query_rows,
825            ..Engine::default()
826        };
827        transient.execute_readonly_with_cancel(sql, cancel)
828    }
829
830    /// Construct an engine restored from a previously-snapshotted catalog
831    /// (see `snapshot()`).
832    pub fn restore(catalog: Catalog) -> Self {
833        Self {
834            catalog,
835            tx_catalogs: BTreeMap::new(),
836            current_tx: None,
837            next_tx_id: 1,
838            clock: None,
839            salt_fn: None,
840            max_query_rows: None,
841            users: UserStore::new(),
842            publications: publications::Publications::new(),
843            subscriptions: subscriptions::Subscriptions::new(),
844            statistics: statistics::Statistics::new(),
845            plan_cache: plan_cache::PlanCache::new(),
846            query_stats: query_stats::QueryStats::new(),
847            activity_provider: None,
848            audit_chain_provider: None,
849            audit_verifier: None,
850            slow_query_threshold_us: None,
851            slow_query_logger: None,
852            session_params: BTreeMap::new(),
853            trigger_recursion_depth: 0,
854            foreign_key_checks: true,
855            pending_foreign_keys: Vec::new(),
856        }
857    }
858
859    /// Restore an engine + user table from a v4.1 envelope produced
860    /// by `snapshot_with_users()`. Falls back to plain catalog-only
861    /// restore if the envelope magic isn't present (so v3.x snapshot
862    /// files still load). v6.1.2 adds the optional publications
863    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
864    /// empty publication table.
865    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
866        match split_envelope(buf) {
867            EnvelopeParse::Pair {
868                catalog: catalog_bytes,
869                users: user_bytes,
870                publications: pub_bytes,
871                subscriptions: sub_bytes,
872                statistics: stats_bytes,
873            } => {
874                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
875                let users = users::deserialize_users(user_bytes)
876                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
877                let publications = match pub_bytes {
878                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
879                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
880                    })?,
881                    None => publications::Publications::new(),
882                };
883                let subscriptions = match sub_bytes {
884                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
885                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
886                    })?,
887                    None => subscriptions::Subscriptions::new(),
888                };
889                let statistics = match stats_bytes {
890                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
891                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
892                    })?,
893                    None => statistics::Statistics::new(),
894                };
895                Ok(Self {
896                    catalog,
897                    tx_catalogs: BTreeMap::new(),
898                    current_tx: None,
899                    next_tx_id: 1,
900                    clock: None,
901                    salt_fn: None,
902                    max_query_rows: None,
903                    users,
904                    publications,
905                    subscriptions,
906                    statistics,
907                    plan_cache: plan_cache::PlanCache::new(),
908                    query_stats: query_stats::QueryStats::new(),
909                    activity_provider: None,
910                    audit_chain_provider: None,
911                    audit_verifier: None,
912                    slow_query_threshold_us: None,
913                    slow_query_logger: None,
914                    session_params: BTreeMap::new(),
915                    trigger_recursion_depth: 0,
916            foreign_key_checks: true,
917            pending_foreign_keys: Vec::new(),
918                })
919            }
920            EnvelopeParse::CrcMismatch { expected, computed } => {
921                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
922                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
923                ))))
924            }
925            EnvelopeParse::Bare => {
926                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
927                Ok(Self::restore(catalog))
928            }
929        }
930    }
931
932    pub const fn users(&self) -> &UserStore {
933        &self.users
934    }
935
936    /// `salt` is supplied by the caller (the host has a random
937    /// source; the engine is `no_std`). Caller should pass a fresh
938    /// 16-byte random value per user.
939    pub fn create_user(
940        &mut self,
941        name: &str,
942        password: &str,
943        role: Role,
944        salt: [u8; 16],
945    ) -> Result<(), UserError> {
946        self.users.create(name, password, role, salt)?;
947        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
948        // auth can verify without re-running PBKDF2 per attempt.
949        // Uses a fresh salt from the host RNG (falls back to a
950        // deterministic per-username salt when no RNG is wired, same
951        // as the legacy hash path).
952        let scram_salt = self.salt_fn.map_or_else(
953            || {
954                let mut s = [0u8; users::SCRAM_SALT_LEN];
955                let digest = spg_crypto::hash(name.as_bytes());
956                // Use bytes 16..32 of BLAKE3 so we don't reuse the
957                // exact same fallback salt as the BLAKE3 hash path.
958                s.copy_from_slice(&digest[16..32]);
959                s
960            },
961            |f| f(),
962        );
963        self.users
964            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
965        Ok(())
966    }
967
968    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
969        self.users.drop(name)
970    }
971
972    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
973        self.users.verify(name, password)
974    }
975
976    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
977    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
978    #[must_use]
979    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
980        self.clock = Some(clock);
981        self
982    }
983
984    /// Builder: attach an OS-backed RNG for per-user password salts.
985    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
986    #[must_use]
987    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
988        self.salt_fn = Some(f);
989        self
990    }
991
992    /// Builder: cap the number of rows a single SELECT may return.
993    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
994    /// the bound is checked inside the executor so a runaway
995    /// catalog scan can't allocate millions of rows before the
996    /// server gets a chance to reject the result.
997    #[must_use]
998    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
999        self.max_query_rows = Some(n);
1000        self
1001    }
1002
1003    /// The *committed* catalog. Note: during a transaction this returns the
1004    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
1005    /// the shadow. Tests that inspect outside-TX state should use this.
1006    pub const fn catalog(&self) -> &Catalog {
1007        &self.catalog
1008    }
1009
1010    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
1011    /// adds the rule that an open TX's shadow is never snapshotted — only the
1012    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
1013    /// when there are users to persist; an empty user table snapshots as the
1014    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
1015    /// adds publications to the envelope condition: either non-empty
1016    /// users OR non-empty publications now triggers the envelope path.
1017    pub fn snapshot(&self) -> Vec<u8> {
1018        if self.users.is_empty()
1019            && self.publications.is_empty()
1020            && self.subscriptions.is_empty()
1021            && self.statistics.is_empty()
1022        {
1023            self.catalog.serialize()
1024        } else {
1025            build_envelope(
1026                &self.catalog.serialize(),
1027                &users::serialize_users(&self.users),
1028                &self.publications.serialize(),
1029                &self.subscriptions.serialize(),
1030                &self.statistics.serialize(),
1031            )
1032        }
1033    }
1034
1035    /// True when at least one TX slot is in flight. v4.41.1 runtime
1036    /// invariant: at most one slot active at a time (dispatch holds
1037    /// `engine.write()` across the entire wrap). v4.42 will let this
1038    /// return true with multiple slots concurrently.
1039    pub fn in_transaction(&self) -> bool {
1040        !self.tx_catalogs.is_empty()
1041    }
1042
1043    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
1044    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
1045    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
1046    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
1047    /// sequentially under a single `engine.write()` so each task's
1048    /// mutations accumulate into shared state, then either keeps the
1049    /// accumulated state (fsync OK) or restores the pre-image via
1050    /// `replace_catalog` (fsync err).
1051    pub fn alloc_tx_id(&mut self) -> TxId {
1052        let id = TxId(self.next_tx_id);
1053        self.next_tx_id = self.next_tx_id.saturating_add(1);
1054        id
1055    }
1056
1057    /// v4.42 — atomically replace the live catalog. Used by the
1058    /// commit-barrier leader to roll back a group whose batched
1059    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1060    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1061    /// at group start, sequentially applies each task's BEGIN+sql+
1062    /// COMMIT under the same write lock to accumulate mutations
1063    /// into shared state, batches the WAL bytes, fsyncs once, and
1064    /// on failure calls this with the pre-image to undo every
1065    /// task in the group at once.
1066    ///
1067    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1068    /// explicit-TX slot from a concurrent client (created via the
1069    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1070    /// MVCC-readers v5+ work) has its own snapshot baked into the
1071    /// slot — restoring `self.catalog` to the pre-image leaves
1072    /// those slots untouched, exactly as they were when the leader
1073    /// took the lock. The leader's own implicit-TX slots are all
1074    /// already discarded (`exec_commit` removed them as each
1075    /// task's COMMIT ran) by the time this is reached.
1076    pub fn replace_catalog(&mut self, catalog: Catalog) {
1077        self.catalog = catalog;
1078    }
1079
1080    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1081    /// so tests + the spg-server freezer can drive a freeze without
1082    /// reaching into the private `active_catalog_mut`. v6.7.4
1083    /// parallel freezer will build on this surface.
1084    ///
1085    /// Marks the table's cached `cold_row_count` stale because the
1086    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1087    pub fn freeze_oldest_to_cold(
1088        &mut self,
1089        table_name: &str,
1090        index_name: &str,
1091        max_rows: usize,
1092    ) -> Result<spg_storage::FreezeReport, EngineError> {
1093        let report = self
1094            .active_catalog_mut()
1095            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1096            .map_err(EngineError::Storage)?;
1097        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1098            t.mark_cold_row_count_stale();
1099        }
1100        Ok(report)
1101    }
1102
1103    /// v6.7.5 — public shim used by the spg-server follower's
1104    /// segment-forwarding receiver. Registers a cold-tier segment
1105    /// at a specific id (the master's id, as transmitted on the
1106    /// wire) so the follower's BTree-Cold locators stay byte-
1107    /// identical with the master's. Wraps
1108    /// `Catalog::load_segment_bytes_at` under the standard
1109    /// clone-mutate-replace pattern.
1110    ///
1111    /// Returns `Ok(())` on success **and** on the "slot already
1112    /// occupied" case — a follower mid-reconnect may receive a
1113    /// segment chunk for a segment_id it already has on disk
1114    /// (forwarded last session); the caller should treat that
1115    /// path as a no-op rather than a fatal error.
1116    pub fn receive_cold_segment(
1117        &mut self,
1118        segment_id: u32,
1119        bytes: Vec<u8>,
1120    ) -> Result<(), EngineError> {
1121        let mut new_cat = self.catalog.clone();
1122        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1123            Ok(()) => {
1124                self.replace_catalog(new_cat);
1125                Ok(())
1126            }
1127            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1128            Err(e) => Err(EngineError::Storage(e)),
1129        }
1130    }
1131
1132    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1133    /// driving every BTree index on every user table. Returns one
1134    /// `(table, index, report)` triple for each merge that
1135    /// actually happened (no-op (table, index) pairs are filtered
1136    /// out so callers can size persist-side work to the live
1137    /// merges). Caller is responsible for persisting each
1138    /// `report.merged_segment_bytes` and updating the on-disk
1139    /// segment registry; engine layer is no_std and never
1140    /// touches disk.
1141    ///
1142    /// Marks every touched table's cached `cold_row_count` stale
1143    /// — compaction GC'd some shadowed rows, so the count must be
1144    /// re-derived on the next ANALYZE.
1145    pub fn compact_cold_segments_with_target(
1146        &mut self,
1147        target_segment_bytes: u64,
1148    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1149        let table_names = self.active_catalog().table_names();
1150        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1151        for tname in table_names {
1152            if is_internal_table_name(&tname) {
1153                continue;
1154            }
1155            let idx_names: Vec<String> = {
1156                let Some(t) = self.active_catalog().get(&tname) else {
1157                    continue;
1158                };
1159                t.indices()
1160                    .iter()
1161                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1162                    .map(|i| i.name.clone())
1163                    .collect()
1164            };
1165            for iname in idx_names {
1166                let report = self
1167                    .active_catalog_mut()
1168                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1169                    .map_err(EngineError::Storage)?;
1170                if report.merged_segment_id.is_some() {
1171                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1172                        t.mark_cold_row_count_stale();
1173                    }
1174                    reports.push((tname.clone(), iname, report));
1175                }
1176            }
1177        }
1178        Ok(reports)
1179    }
1180
1181    fn active_catalog(&self) -> &Catalog {
1182        match self.current_tx {
1183            Some(t) => self
1184                .tx_catalogs
1185                .get(&t)
1186                .map_or(&self.catalog, |s| &s.catalog),
1187            None => &self.catalog,
1188        }
1189    }
1190
1191    /// v7.12.4 — snapshot every row-level trigger on `table` that
1192    /// fires for `event` (`"INSERT"` / `"UPDATE"` / `"DELETE"`) at
1193    /// the given `timing` (`"BEFORE"` / `"AFTER"`), and clone its
1194    /// referenced function definition. Returned as a vec of owned
1195    /// `FunctionDef` so the row-write loop can fire them without
1196    /// holding a borrow on the catalog (which would conflict with
1197    /// the table.insert / update_row / delete mutable borrows).
1198    fn snapshot_row_triggers(
1199        &self,
1200        table: &str,
1201        event: &str,
1202        timing: &str,
1203    ) -> Vec<spg_storage::FunctionDef> {
1204        let cat = self.active_catalog();
1205        cat.triggers()
1206            .iter()
1207            .filter(|t| {
1208                // v7.16.1 — skip disabled triggers (mailrs
1209                // round-9 A.2.b — pg_dump --disable-triggers).
1210                t.enabled
1211                    && t.table == table
1212                    && t.timing.eq_ignore_ascii_case(timing)
1213                    && t.for_each.eq_ignore_ascii_case("row")
1214                    && t.events.iter().any(|e| e.eq_ignore_ascii_case(event))
1215            })
1216            .filter_map(|t| cat.functions().get(&t.function).cloned())
1217            .collect()
1218    }
1219
1220    /// v7.13.0 — UPDATE-side snapshot that pairs each trigger's
1221    /// function with its `UPDATE OF cols` filter (mailrs round-5
1222    /// G7). Empty filter Vec means "fire unconditionally", matching
1223    /// the v7.12 behaviour.
1224    fn snapshot_update_row_triggers(
1225        &self,
1226        table: &str,
1227        timing: &str,
1228    ) -> Vec<(spg_storage::FunctionDef, Vec<String>)> {
1229        let cat = self.active_catalog();
1230        cat.triggers()
1231            .iter()
1232            .filter(|t| {
1233                // v7.16.1 — skip disabled triggers.
1234                t.enabled
1235                    && t.table == table
1236                    && t.timing.eq_ignore_ascii_case(timing)
1237                    && t.for_each.eq_ignore_ascii_case("row")
1238                    && t.events.iter().any(|e| e.eq_ignore_ascii_case("UPDATE"))
1239            })
1240            .filter_map(|t| {
1241                cat.functions()
1242                    .get(&t.function)
1243                    .cloned()
1244                    .map(|fd| (fd, t.update_columns.clone()))
1245            })
1246            .collect()
1247    }
1248
1249    /// v7.12.7 — drain the trigger-emitted embedded SQL queue.
1250    /// Called by the INSERT / UPDATE / DELETE executors after
1251    /// their main row-write loop returns. Each statement runs
1252    /// inside the same cancel scope as the firing DML and bumps
1253    /// the recursion counter; nested embedded SQL beyond
1254    /// [`MAX_TRIGGER_RECURSION`] errors with a clear message so
1255    /// a trigger-graph cycle surfaces as a query failure instead
1256    /// of stack-blowing the engine.
1257    fn execute_deferred_trigger_stmts(
1258        &mut self,
1259        deferred: Vec<triggers::DeferredEmbeddedStmt>,
1260        cancel: CancelToken<'_>,
1261    ) -> Result<(), EngineError> {
1262        for d in deferred {
1263            if self.trigger_recursion_depth >= MAX_TRIGGER_RECURSION {
1264                return Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1265                    "trigger embedded SQL recursion depth {} exceeded (trigger function \
1266                     {:?} would push past the {} cap — check for trigger cycles)",
1267                    self.trigger_recursion_depth,
1268                    d.function,
1269                    MAX_TRIGGER_RECURSION,
1270                ))));
1271            }
1272            self.trigger_recursion_depth += 1;
1273            let res = self.execute_stmt_with_cancel(d.stmt, cancel);
1274            self.trigger_recursion_depth -= 1;
1275            res?;
1276        }
1277        Ok(())
1278    }
1279
1280    fn active_catalog_mut(&mut self) -> &mut Catalog {
1281        let tx = self.current_tx;
1282        match tx {
1283            Some(t) => match self.tx_catalogs.get_mut(&t) {
1284                Some(s) => &mut s.catalog,
1285                None => &mut self.catalog,
1286            },
1287            None => &mut self.catalog,
1288        }
1289    }
1290
1291    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1292    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1293    /// every other statement, so the caller can fall through to the
1294    /// `&mut self` `execute` path under a write lock. Engine state is
1295    /// not mutated even on the success path (`rewrite_clock_calls`
1296    /// and `resolve_order_by_position` both mutate the locally-owned
1297    /// AST, not `self`).
1298    ///
1299    /// **v4.0 concurrency**: this is the entry point the server takes
1300    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1301    /// parallel without serialising on a single mutex.
1302    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1303        self.execute_readonly_with_cancel(sql, CancelToken::none())
1304    }
1305
1306    /// v4.5 — read path with cooperative cancellation. Token's
1307    /// `is_cancelled` is checked at the start (so a watchdog that
1308    /// already fired returns Cancelled immediately) and at row-loop
1309    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1310    /// don't bother checking.
1311    pub fn execute_readonly_with_cancel(
1312        &self,
1313        sql: &str,
1314        cancel: CancelToken<'_>,
1315    ) -> Result<QueryResult, EngineError> {
1316        cancel.check()?;
1317        let mut stmt = parser::parse_statement(sql)?;
1318        let now_micros = self.clock.map(|f| f());
1319        rewrite_clock_calls(&mut stmt, now_micros);
1320        if let Statement::Select(s) = &mut stmt {
1321            resolve_order_by_position(s);
1322            // v6.2.3 — cost-based JOIN reorder (read path).
1323            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1324        }
1325        let result = match stmt {
1326            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1327            Statement::ShowTables => Ok(self.exec_show_tables()),
1328            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1329            Statement::ShowUsers => Ok(self.exec_show_users()),
1330            Statement::ShowPublications => Ok(self.exec_show_publications()),
1331            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1332            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1333                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1334            )),
1335            Statement::Explain(e) => self.exec_explain(&e, cancel),
1336            _ => Err(EngineError::WriteRequired),
1337        };
1338        self.enforce_row_limit(result)
1339    }
1340
1341    /// v4.2: cap result-set size. Applied after the executor
1342    /// materialises rows but before they leave the engine — wrapping
1343    /// every Rows-returning exec_* function would scatter the check.
1344    fn enforce_row_limit(
1345        &self,
1346        result: Result<QueryResult, EngineError>,
1347    ) -> Result<QueryResult, EngineError> {
1348        if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1349            && rows.len() > cap
1350        {
1351            return Err(EngineError::RowLimitExceeded(cap));
1352        }
1353        result
1354    }
1355
1356    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1357        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1358    }
1359
1360    /// v4.5 — write path with cooperative cancellation. Same dispatch
1361    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1362    /// a separate entry point for backward-compat with the v4.5
1363    /// public API.
1364    pub fn execute_with_cancel(
1365        &mut self,
1366        sql: &str,
1367        cancel: CancelToken<'_>,
1368    ) -> Result<QueryResult, EngineError> {
1369        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1370    }
1371
1372    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1373    /// slot identified by `tx_id` so spg-server dispatch can scope
1374    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1375    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1376    /// every other caller (engine self-tests, replay, spg-embedded)
1377    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1378    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1379        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1380    }
1381
1382    /// v4.41.1 write path with cooperative cancellation + explicit TX
1383    /// scope. Sets `self.current_tx` for the duration of the call so
1384    /// every `exec_*` helper transparently sees its TX's shadow
1385    /// catalog and savepoint stack; restores on exit so the field is
1386    /// only valid mid-call (no leakage across calls).
1387    pub fn execute_in_with_cancel(
1388        &mut self,
1389        sql: &str,
1390        tx_id: TxId,
1391        cancel: CancelToken<'_>,
1392    ) -> Result<QueryResult, EngineError> {
1393        let saved = self.current_tx;
1394        self.current_tx = Some(tx_id);
1395        let result = self.execute_inner_with_cancel(sql, cancel);
1396        self.current_tx = saved;
1397        result
1398    }
1399
1400    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1401    /// resulting [`Statement`] can be cached and re-executed via
1402    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1403    /// the simple-query path would synthesise internally (clock
1404    /// rewrites + ORDER BY position-ref resolution applied at
1405    /// prepare time, since both are session-independent). The
1406    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1407    /// nodes; they're resolved to concrete values per-call by
1408    /// `execute_prepared`'s substitution walk.
1409    ///
1410    /// Pgwire's `Parse` (P) message lands here.
1411    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1412        let mut stmt = parser::parse_statement(sql)?;
1413        let now_micros = self.clock.map(|f| f());
1414        rewrite_clock_calls(&mut stmt, now_micros);
1415        if let Statement::Select(s) = &mut stmt {
1416            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1417            // SELECT-list item BEFORE position / alias resolution so
1418            // downstream passes see the explicit list.
1419            expand_group_by_all(s);
1420            resolve_order_by_position(s);
1421            // v6.2.3 — cost-based JOIN reorder. No-op for
1422            // single-table FROMs or any non-INNER join shape.
1423            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1424        }
1425        Ok(stmt)
1426    }
1427
1428    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1429    /// the plan cache on hit, runs the full `prepare()` path on miss
1430    /// and inserts the resulting plan before returning. Skipping the
1431    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1432    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1433    ///
1434    /// Returns a cloned `Statement` (not a borrow) because the
1435    /// pgwire layer owns its `PreparedStmt` map per-session and the
1436    /// engine-level cache must stay available for other sessions.
1437    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1438    /// it replaces.
1439    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1440        // v6.3.1 — version-aware lookup. If the cached plan was
1441        // prepared before the most recent ANALYZE, evict and replan.
1442        let current_version = self.statistics.version();
1443        if let Some(plan) = self.plan_cache.get(sql) {
1444            if plan.statistics_version == current_version {
1445                return Ok(plan.stmt.clone());
1446            }
1447            // Stale entry — fall through to evict + re-prepare.
1448        }
1449        self.plan_cache.evict(sql);
1450        let stmt = self.prepare(sql)?;
1451        let source_tables = plan_cache::collect_source_tables(&stmt);
1452        let plan = plan_cache::PreparedPlan {
1453            stmt: stmt.clone(),
1454            statistics_version: current_version,
1455            source_tables,
1456            describe_columns: alloc::vec::Vec::new(),
1457        };
1458        self.plan_cache.insert(String::from(sql), plan);
1459        Ok(stmt)
1460    }
1461
1462    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1463    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1464        &self.plan_cache
1465    }
1466
1467    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1468    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1469        &mut self.plan_cache
1470    }
1471
1472    /// v6.3.3 — Describe a prepared `Statement` without executing.
1473    /// Returns `(parameter_oids, output_columns)`. Empty
1474    /// `output_columns` means the statement has no row-producing
1475    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1476    /// — pgwire layer maps that to a `NoData` reply.
1477    pub fn describe_prepared(&self, stmt: &Statement) -> (Vec<u32>, Vec<ColumnSchema>) {
1478        describe::describe_prepared(stmt, self.active_catalog())
1479    }
1480
1481    /// v6.1.1 — execute a [`Statement`] previously returned by
1482    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1483    /// nodes for the corresponding [`Value`] in `params` (1-based
1484    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1485    /// are decoded into typed `Value`s by the pgwire layer before
1486    /// this call so the resulting AST hits the same execution
1487    /// path as a simple query — no SQL re-parse.
1488    ///
1489    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1490    pub fn execute_prepared(
1491        &mut self,
1492        mut stmt: Statement,
1493        params: &[Value],
1494    ) -> Result<QueryResult, EngineError> {
1495        substitute_placeholders(&mut stmt, params)?;
1496        // v7.16.0 — set `current_tx` for the duration of the
1497        // dispatch so the `exec_*` helpers see the right TX
1498        // slot (matches what `execute_in_with_cancel` does for
1499        // simple-query). Pre-v7.16 the simple-query path
1500        // worked because every public entry point routed
1501        // through `execute_in_with_cancel`; the prepared path
1502        // skipped the wrap and so its INSERTs/UPDATEs landed
1503        // in the no-tx default slot, silently invisible to a
1504        // BEGIN/COMMIT-bracketed flow. Caught by spg-sqlx's
1505        // first transaction-visibility test.
1506        let saved = self.current_tx;
1507        self.current_tx = Some(IMPLICIT_TX);
1508        let result = self.execute_stmt_with_cancel(stmt, CancelToken::none());
1509        self.current_tx = saved;
1510        result
1511    }
1512
1513    fn execute_inner_with_cancel(
1514        &mut self,
1515        sql: &str,
1516        cancel: CancelToken<'_>,
1517    ) -> Result<QueryResult, EngineError> {
1518        cancel.check()?;
1519        let stmt = self.prepare(sql)?;
1520        // v6.5.1 — wrap the executor with a wall-clock window so we
1521        // can record into spg_stat_query. Skip when the engine has
1522        // no clock attached (no_std embedded callers).
1523        let start_us = self.clock.map(|f| f());
1524        let result = self.execute_stmt_with_cancel(stmt, cancel);
1525        if let (Some(t0), Ok(_)) = (start_us, &result) {
1526            let now = self.clock.map_or(t0, |f| f());
1527            let elapsed = now.saturating_sub(t0).max(0) as u64;
1528            self.query_stats.record(sql, elapsed, now as u64);
1529            // v6.5.6 — slow-query log: fire callback when elapsed
1530            // exceeds the configured floor.
1531            if let (Some(threshold), Some(logger)) =
1532                (self.slow_query_threshold_us, self.slow_query_logger)
1533                && elapsed >= threshold
1534            {
1535                logger(sql, elapsed);
1536            }
1537        }
1538        result
1539    }
1540
1541    fn execute_stmt_with_cancel(
1542        &mut self,
1543        stmt: Statement,
1544        cancel: CancelToken<'_>,
1545    ) -> Result<QueryResult, EngineError> {
1546        cancel.check()?;
1547        let result = match stmt {
1548            Statement::CreateTable(s) => self.exec_create_table(s),
1549            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
1550            // CommandOk with affected=0; modified_catalog=false so
1551            // the WAL doesn't grow a useless entry. mailrs F3.
1552            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
1553                affected: 0,
1554                modified_catalog: false,
1555            }),
1556            // v7.9.27 — DO $$ ... $$ is also a no-op (SPG has no
1557            // PL/pgSQL). mailrs H1 + pg_dump compat.
1558            Statement::DoBlock => Ok(QueryResult::CommandOk {
1559                affected: 0,
1560                modified_catalog: false,
1561            }),
1562            // v7.14.0 — empty-statement no-op for pg_dump /
1563            // mysqldump preamble lines that collapse to nothing
1564            // after comment-stripping.
1565            Statement::Empty => Ok(QueryResult::CommandOk {
1566                affected: 0,
1567                modified_catalog: false,
1568            }),
1569            Statement::DropTable { names, if_exists } => self.exec_drop_table(names, if_exists),
1570            Statement::DropIndex { name, if_exists } => self.exec_drop_index(name, if_exists),
1571            Statement::CreateIndex(s) => self.exec_create_index(s),
1572            Statement::Insert(s) => self.exec_insert(s),
1573            Statement::Update(s) => self.exec_update_cancel(&s, cancel),
1574            Statement::Delete(s) => self.exec_delete_cancel(&s, cancel),
1575            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1576            Statement::Begin => self.exec_begin(),
1577            Statement::Commit => self.exec_commit(),
1578            Statement::Rollback => self.exec_rollback(),
1579            Statement::Savepoint(name) => self.exec_savepoint(name),
1580            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
1581            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
1582            Statement::ShowTables => Ok(self.exec_show_tables()),
1583            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1584            Statement::ShowUsers => Ok(self.exec_show_users()),
1585            Statement::ShowPublications => Ok(self.exec_show_publications()),
1586            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1587            Statement::CreateUser(s) => self.exec_create_user(&s),
1588            Statement::DropUser(name) => self.exec_drop_user(&name),
1589            Statement::Explain(e) => self.exec_explain(&e, cancel),
1590            Statement::AlterIndex(s) => self.exec_alter_index(s),
1591            Statement::AlterTable(s) => self.exec_alter_table(s),
1592            Statement::CreatePublication(s) => self.exec_create_publication(s),
1593            Statement::DropPublication(name) => self.exec_drop_publication(&name),
1594            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
1595            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
1596            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
1597            // which lives in spg-server's ServerState. The engine
1598            // surfaces a clear error; the server-layer dispatch
1599            // intercepts the SQL before it reaches the engine on
1600            // a server build, so this arm only fires for
1601            // engine-only callers (spg-embedded, lib tests).
1602            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1603                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1604            )),
1605            // v6.2.0 — ANALYZE recomputes per-column histograms.
1606            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
1607            // v6.7.3 — COMPACT COLD SEGMENTS.
1608            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
1609            // v7.12.1 — SET / RESET session parameter. Engine
1610            // tracks the value in `session_params`; FTS dispatcher
1611            // reads `default_text_search_config`. Everything else
1612            // is a recorded no-op (PG dump compat).
1613            Statement::SetParameter { name, value } => {
1614                self.set_session_param(name, value);
1615                Ok(QueryResult::CommandOk {
1616                    affected: 0,
1617                    modified_catalog: false,
1618                })
1619            }
1620            // v7.14.0 — MySQL multi-assignment SET. Each pair runs
1621            // through `set_session_param` so engine-known params
1622            // (FOREIGN_KEY_CHECKS, session_replication_role, …) take
1623            // effect; unknown pairs (including `@VAR` LHS from the
1624            // mysqldump preamble) are recorded then ignored.
1625            Statement::SetParameterList(pairs) => {
1626                for (name, value) in pairs {
1627                    self.set_session_param(name, value);
1628                }
1629                Ok(QueryResult::CommandOk {
1630                    affected: 0,
1631                    modified_catalog: false,
1632                })
1633            }
1634            // v7.12.4 — CREATE FUNCTION / CREATE TRIGGER / DROP …
1635            // for the PL/pgSQL trigger surface. exec_* methods are
1636            // defined alongside the existing CREATE handlers below.
1637            Statement::CreateFunction(s) => self.exec_create_function(s),
1638            Statement::CreateTrigger(s) => self.exec_create_trigger(s),
1639            Statement::DropTrigger {
1640                name,
1641                table,
1642                if_exists,
1643            } => self.exec_drop_trigger(&name, &table, if_exists),
1644            Statement::DropFunction { name, if_exists } => {
1645                self.exec_drop_function(&name, if_exists)
1646            }
1647            Statement::ResetParameter(target) => {
1648                match target {
1649                    None => self.session_params.clear(),
1650                    Some(name) => {
1651                        self.session_params.remove(&name.to_ascii_lowercase());
1652                    }
1653                }
1654                Ok(QueryResult::CommandOk {
1655                    affected: 0,
1656                    modified_catalog: false,
1657                })
1658            }
1659        };
1660        self.enforce_row_limit(result)
1661    }
1662
1663    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
1664    /// surface as `EngineError::Unsupported` so the existing PG-wire
1665    /// error mapping stays uniform; the message carries the name so
1666    /// operators can grep replication-log noise. Inside-transaction
1667    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
1668    /// stance) — replication-catalog mutation is a connection-level
1669    /// administrative op, not a transactional one.
1670    fn exec_create_publication(
1671        &mut self,
1672        s: CreatePublicationStatement,
1673    ) -> Result<QueryResult, EngineError> {
1674        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
1675        // was over-cautious: it also blocked the auto-commit wrap
1676        // path (which begins an internal TX around every WAL-
1677        // logged statement). PG itself allows CREATE PUBLICATION
1678        // inside a transaction (it rolls back with the TX).
1679        self.publications
1680            .create(s.name, s.scope)
1681            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
1682        Ok(QueryResult::CommandOk {
1683            affected: 1,
1684            modified_catalog: true,
1685        })
1686    }
1687
1688    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
1689    /// no-op when the publication doesn't exist (returns `affected=0`
1690    /// in that case so the wire-level command tag distinguishes
1691    /// "dropped" from "no-op", though both succeed).
1692    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1693        let removed = self.publications.drop(name);
1694        Ok(QueryResult::CommandOk {
1695            affected: usize::from(removed),
1696            modified_catalog: removed,
1697        })
1698    }
1699
1700    /// v6.1.2 — read access to the publication catalog. Used by
1701    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
1702    /// (v6.1.3+), and by e2e tests that need to assert state without
1703    /// going through the wire.
1704    pub const fn publications(&self) -> &publications::Publications {
1705        &self.publications
1706    }
1707
1708    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
1709    /// `enabled = true` and `last_received_pos = 0` for a freshly-
1710    /// created subscription. The actual worker thread is spawned
1711    /// by spg-server once the engine returns success.
1712    fn exec_create_subscription(
1713        &mut self,
1714        s: CreateSubscriptionStatement,
1715    ) -> Result<QueryResult, EngineError> {
1716        // See exec_create_publication — the in_transaction gate
1717        // was over-cautious; the auto-commit wrap path holds an
1718        // internal TX that this check was incorrectly blocking.
1719        let sub = subscriptions::Subscription {
1720            conn_str: s.conn_str,
1721            publications: s.publications,
1722            enabled: true,
1723            last_received_pos: 0,
1724        };
1725        self.subscriptions
1726            .create(s.name, sub)
1727            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
1728        Ok(QueryResult::CommandOk {
1729            affected: 1,
1730            modified_catalog: true,
1731        })
1732    }
1733
1734    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
1735    /// doesn't exist (PG-compatible). The associated worker is
1736    /// torn down by spg-server when it observes the catalog
1737    /// change at the next snapshot or via the engine's
1738    /// subscriptions accessor (the worker polls the catalog on
1739    /// reconnect; v6.1.5's filter-side will tighten this to an
1740    /// explicit signal).
1741    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1742        let removed = self.subscriptions.drop(name);
1743        Ok(QueryResult::CommandOk {
1744            affected: usize::from(removed),
1745            modified_catalog: removed,
1746        })
1747    }
1748
1749    /// v6.1.4 — read access to the subscription catalog. Used by
1750    /// the subscription worker (read its own row to find its
1751    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
1752    /// and by e2e tests asserting state directly.
1753    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
1754        &self.subscriptions
1755    }
1756
1757    /// v6.1.4 — write access to `last_received_pos`. Worker
1758    /// calls this after each apply batch (under the engine's
1759    /// write-lock). Returns `false` when the subscription was
1760    /// dropped between when the worker received the record and
1761    /// when this call landed.
1762    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
1763        self.subscriptions.update_last_received_pos(name, pos)
1764    }
1765
1766    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
1767    /// `(name, conn_str, publications, enabled, last_received_pos)`
1768    /// ordered by subscription name. The `publications` column is
1769    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
1770    /// callers wanting structured access read `Engine::subscriptions`.
1771    fn exec_show_subscriptions(&self) -> QueryResult {
1772        let columns = alloc::vec![
1773            ColumnSchema::new("name", DataType::Text, false),
1774            ColumnSchema::new("conn_str", DataType::Text, false),
1775            ColumnSchema::new("publications", DataType::Text, false),
1776            ColumnSchema::new("enabled", DataType::Bool, false),
1777            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1778        ];
1779        let rows: Vec<Row> = self
1780            .subscriptions
1781            .iter()
1782            .map(|(name, sub)| {
1783                Row::new(alloc::vec![
1784                    Value::Text(name.clone()),
1785                    Value::Text(sub.conn_str.clone()),
1786                    Value::Text(sub.publications.join(", ")),
1787                    Value::Bool(sub.enabled),
1788                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1789                ])
1790            })
1791            .collect();
1792        QueryResult::Rows { columns, rows }
1793    }
1794
1795    /// v6.2.0 — materialise `spg_statistic` rows. One row per
1796    /// `(table, column)` pair tracked in `Statistics`, with
1797    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
1798    /// the same canonical form vector literals use for round-trip.
1799    fn exec_spg_statistic(&self) -> QueryResult {
1800        let columns = alloc::vec![
1801            ColumnSchema::new("table_name", DataType::Text, false),
1802            ColumnSchema::new("column_name", DataType::Text, false),
1803            ColumnSchema::new("null_frac", DataType::Float, false),
1804            ColumnSchema::new("n_distinct", DataType::BigInt, false),
1805            ColumnSchema::new("histogram_bounds", DataType::Text, false),
1806            // v6.7.0 — appended column (v6.2.0 stability contract
1807            // allows APPEND to spg_statistic, not reorder/rename).
1808            // Reports the cached per-table cold-row count; same
1809            // value across every column row of the same table.
1810            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
1811        ];
1812        let rows: Vec<Row> = self
1813            .statistics
1814            .iter()
1815            .map(|((t, c), s)| {
1816                let cold = self
1817                    .catalog
1818                    .get(t)
1819                    .map_or(0, |table| table.cold_row_count());
1820                Row::new(alloc::vec![
1821                    Value::Text(t.clone()),
1822                    Value::Text(c.clone()),
1823                    Value::Float(f64::from(s.null_frac)),
1824                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
1825                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
1826                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
1827                ])
1828            })
1829            .collect();
1830        QueryResult::Rows { columns, rows }
1831    }
1832
1833    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
1834    /// per subscription with `(name, conn_str, publications,
1835    /// last_received_pos, enabled)`. Surface mirrors
1836    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
1837    /// shape so it composes with SELECT clauses (WHERE, projection
1838    /// onto specific columns, etc).
1839    fn exec_spg_stat_replication(&self) -> QueryResult {
1840        let columns = alloc::vec![
1841            ColumnSchema::new("name", DataType::Text, false),
1842            ColumnSchema::new("conn_str", DataType::Text, false),
1843            ColumnSchema::new("publications", DataType::Text, false),
1844            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1845            ColumnSchema::new("enabled", DataType::Bool, false),
1846        ];
1847        let rows: Vec<Row> = self
1848            .subscriptions
1849            .iter()
1850            .map(|(name, sub)| {
1851                Row::new(alloc::vec![
1852                    Value::Text(name.clone()),
1853                    Value::Text(sub.conn_str.clone()),
1854                    Value::Text(sub.publications.join(",")),
1855                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1856                    Value::Bool(sub.enabled),
1857                ])
1858            })
1859            .collect();
1860        QueryResult::Rows { columns, rows }
1861    }
1862
1863    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
1864    /// cold-tier segment with `(segment_id, num_rows, num_pages,
1865    /// total_bytes)`.
1866    ///
1867    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
1868    /// carve-out. Walks every user table's BTree indices to find
1869    /// which table's Cold locators point at each segment. Empty
1870    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
1871    /// before any index registered a locator). The walk is
1872    /// O(tables × indices × keys); cached per call, not across
1873    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
1874    fn exec_spg_stat_segment(&self) -> QueryResult {
1875        let columns = alloc::vec![
1876            ColumnSchema::new("segment_id", DataType::BigInt, false),
1877            ColumnSchema::new("table_name", DataType::Text, false),
1878            ColumnSchema::new("num_rows", DataType::BigInt, false),
1879            ColumnSchema::new("num_pages", DataType::BigInt, false),
1880            ColumnSchema::new("total_bytes", DataType::BigInt, false),
1881        ];
1882        // v6.7.0 — build a segment_id → table_name map by walking
1883        // every user table's BTree indices once. O(tables × indices
1884        // × keys) for the v6.5.0 carve-out resolution; acceptable
1885        // because spg_stat_segment is operator-facing (not on a
1886        // hot-loop path).
1887        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
1888        for tname in self.catalog.table_names() {
1889            if is_internal_table_name(&tname) {
1890                continue;
1891            }
1892            let Some(t) = self.catalog.get(&tname) else {
1893                continue;
1894            };
1895            for idx in t.indices() {
1896                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
1897                    for (_, locs) in map.iter() {
1898                        for loc in locs {
1899                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
1900                                segment_owners
1901                                    .entry(*segment_id)
1902                                    .or_insert_with(|| tname.clone());
1903                            }
1904                        }
1905                    }
1906                }
1907            }
1908        }
1909        let rows: Vec<Row> = self
1910            .catalog
1911            .cold_segment_ids_global()
1912            .iter()
1913            .filter_map(|&id| {
1914                let seg = self.catalog.cold_segment(id)?;
1915                let meta = seg.meta();
1916                let owner = segment_owners.get(&id).cloned().unwrap_or_default();
1917                Some(Row::new(alloc::vec![
1918                    Value::BigInt(i64::from(id)),
1919                    Value::Text(owner),
1920                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
1921                    Value::BigInt(i64::from(meta.num_pages)),
1922                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
1923                ]))
1924            })
1925            .collect();
1926        QueryResult::Rows { columns, rows }
1927    }
1928
1929    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
1930    /// distinct SQL text recorded since the engine booted, capped
1931    /// at `QUERY_STATS_MAX` (1024). Columns:
1932    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
1933    /// mean_us = total_us / exec_count (saturating).
1934    fn exec_spg_stat_query(&self) -> QueryResult {
1935        let columns = alloc::vec![
1936            ColumnSchema::new("sql", DataType::Text, false),
1937            ColumnSchema::new("exec_count", DataType::BigInt, false),
1938            ColumnSchema::new("total_us", DataType::BigInt, false),
1939            ColumnSchema::new("mean_us", DataType::BigInt, false),
1940            ColumnSchema::new("max_us", DataType::BigInt, false),
1941            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
1942        ];
1943        let rows: Vec<Row> = self
1944            .query_stats
1945            .snapshot()
1946            .into_iter()
1947            .map(|(sql, s)| {
1948                let mean = if s.exec_count == 0 {
1949                    0
1950                } else {
1951                    s.total_us / s.exec_count
1952                };
1953                Row::new(alloc::vec![
1954                    Value::Text(sql),
1955                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
1956                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
1957                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
1958                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
1959                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
1960                ])
1961            })
1962            .collect();
1963        QueryResult::Rows { columns, rows }
1964    }
1965
1966    /// v6.5.2 — register a connection-state provider. spg-server
1967    /// calls this at startup with a function that snapshots its
1968    /// per-pgwire-connection registry. Engine reads through the
1969    /// callback on `SELECT * FROM spg_stat_activity`.
1970    #[must_use]
1971    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
1972        self.activity_provider = Some(f);
1973        self
1974    }
1975
1976    /// v6.5.3 — register audit chain provider + verifier.
1977    #[must_use]
1978    pub const fn with_audit_providers(
1979        mut self,
1980        chain: AuditChainProvider,
1981        verify: AuditVerifier,
1982    ) -> Self {
1983        self.audit_chain_provider = Some(chain);
1984        self.audit_verifier = Some(verify);
1985        self
1986    }
1987
1988    /// v6.5.6 — register a slow-query log callback. `threshold_us`
1989    /// is the floor (in microseconds); only executes above the floor
1990    /// fire the callback. spg-server wires this from
1991    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
1992    #[must_use]
1993    pub const fn with_slow_query_log(mut self, threshold_us: u64, logger: SlowQueryLogger) -> Self {
1994        self.slow_query_threshold_us = Some(threshold_us);
1995        self.slow_query_logger = Some(logger);
1996        self
1997    }
1998
1999    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
2000    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
2001    /// the compile-time default of 256.
2002    pub fn set_plan_cache_max(&mut self, n: usize) {
2003        self.plan_cache.set_max_entries(n);
2004    }
2005
2006    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
2007    /// snapshot from the registered `ActivityProvider`. Returns an
2008    /// empty result set when no provider is registered (the no_std
2009    /// embedded path with no pgwire layer).
2010    fn exec_spg_stat_activity(&self) -> QueryResult {
2011        let columns = alloc::vec![
2012            ColumnSchema::new("pid", DataType::Int, false),
2013            ColumnSchema::new("user", DataType::Text, false),
2014            ColumnSchema::new("started_at_us", DataType::BigInt, false),
2015            ColumnSchema::new("current_sql", DataType::Text, false),
2016            ColumnSchema::new("wait_event", DataType::Text, false),
2017            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
2018            ColumnSchema::new("in_transaction", DataType::Bool, false),
2019        ];
2020        let rows: Vec<Row> = self
2021            .activity_provider
2022            .map(|f| f())
2023            .unwrap_or_default()
2024            .into_iter()
2025            .map(|r| {
2026                Row::new(alloc::vec![
2027                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
2028                    Value::Text(r.user),
2029                    Value::BigInt(r.started_at_us),
2030                    Value::Text(r.current_sql),
2031                    Value::Text(r.wait_event),
2032                    Value::BigInt(r.elapsed_us),
2033                    Value::Bool(r.in_transaction),
2034                ])
2035            })
2036            .collect();
2037        QueryResult::Rows { columns, rows }
2038    }
2039
2040    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
2041    /// table with `(table_name, ddl)`. Reconstructed from catalog
2042    /// state on demand.
2043    fn exec_spg_table_ddl(&self) -> QueryResult {
2044        let columns = alloc::vec![
2045            ColumnSchema::new("table_name", DataType::Text, false),
2046            ColumnSchema::new("ddl", DataType::Text, false),
2047        ];
2048        let rows: Vec<Row> = self
2049            .catalog
2050            .table_names()
2051            .into_iter()
2052            .filter(|n| !is_internal_table_name(n))
2053            .filter_map(|name| {
2054                let table = self.catalog.get(&name)?;
2055                let ddl = render_create_table(&name, &table.schema().columns);
2056                Some(Row::new(alloc::vec![Value::Text(name), Value::Text(ddl),]))
2057            })
2058            .collect();
2059        QueryResult::Rows { columns, rows }
2060    }
2061
2062    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
2063    /// with `(role_name, ddl)`. Password is redacted (matches the
2064    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
2065    fn exec_spg_role_ddl(&self) -> QueryResult {
2066        let columns = alloc::vec![
2067            ColumnSchema::new("role_name", DataType::Text, false),
2068            ColumnSchema::new("ddl", DataType::Text, false),
2069        ];
2070        let rows: Vec<Row> = self
2071            .users
2072            .iter()
2073            .map(|(name, rec)| {
2074                let ddl = alloc::format!(
2075                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
2076                    rec.role.as_str(),
2077                );
2078                Row::new(alloc::vec![
2079                    Value::Text(String::from(name)),
2080                    Value::Text(ddl)
2081                ])
2082            })
2083            .collect();
2084        QueryResult::Rows { columns, rows }
2085    }
2086
2087    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
2088    /// `ddl` column concatenates every user table's CREATE +
2089    /// every role's CREATE in deterministic catalog order. Suitable
2090    /// for piping back through `Engine::execute` to recreate a
2091    /// schema-equivalent database.
2092    fn exec_spg_database_ddl(&self) -> QueryResult {
2093        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
2094        let mut out = String::new();
2095        for (name, rec) in self.users.iter() {
2096            out.push_str(&alloc::format!(
2097                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
2098                rec.role.as_str(),
2099            ));
2100        }
2101        for name in self.catalog.table_names() {
2102            if is_internal_table_name(&name) {
2103                continue;
2104            }
2105            if let Some(table) = self.catalog.get(&name) {
2106                out.push_str(&render_create_table(&name, &table.schema().columns));
2107                out.push_str(";\n");
2108            }
2109        }
2110        QueryResult::Rows {
2111            columns,
2112            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
2113        }
2114    }
2115
2116    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
2117    /// snapshot from the registered provider; empty when no
2118    /// provider is set.
2119    fn exec_spg_audit_chain(&self) -> QueryResult {
2120        let columns = alloc::vec![
2121            ColumnSchema::new("seq", DataType::BigInt, false),
2122            ColumnSchema::new("ts_ms", DataType::BigInt, false),
2123            ColumnSchema::new("prev_hash", DataType::Text, false),
2124            ColumnSchema::new("entry_hash", DataType::Text, false),
2125            ColumnSchema::new("sql", DataType::Text, false),
2126        ];
2127        let rows: Vec<Row> = self
2128            .audit_chain_provider
2129            .map(|f| f())
2130            .unwrap_or_default()
2131            .into_iter()
2132            .map(|r| {
2133                Row::new(alloc::vec![
2134                    Value::BigInt(r.seq),
2135                    Value::BigInt(r.ts_ms),
2136                    Value::Text(r.prev_hash_hex),
2137                    Value::Text(r.entry_hash_hex),
2138                    Value::Text(r.sql),
2139                ])
2140            })
2141            .collect();
2142        QueryResult::Rows { columns, rows }
2143    }
2144
2145    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
2146    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
2147    /// on a clean chain. Returns one row with both values 0 when
2148    /// no verifier is registered (no-data fallback for embedded
2149    /// callers).
2150    fn exec_spg_audit_verify(&self) -> QueryResult {
2151        let columns = alloc::vec![
2152            ColumnSchema::new("verified_count", DataType::BigInt, false),
2153            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
2154        ];
2155        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
2156        let row = Row::new(alloc::vec![Value::BigInt(verified), Value::BigInt(broken),]);
2157        QueryResult::Rows {
2158            columns,
2159            rows: alloc::vec![row],
2160        }
2161    }
2162
2163    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
2164    pub fn query_stats(&self) -> &query_stats::QueryStats {
2165        &self.query_stats
2166    }
2167
2168    /// v6.5.1 — mutable accessor (clear, etc).
2169    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
2170        &mut self.query_stats
2171    }
2172
2173    /// v6.2.0 — read access to the per-column statistics table.
2174    /// Used by the planner (v6.2.2 selectivity functions read this),
2175    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
2176    pub const fn statistics(&self) -> &statistics::Statistics {
2177        &self.statistics
2178    }
2179
2180    /// v6.2.1 — return tables whose modified-row count crossed the
2181    /// auto-analyze threshold since the last ANALYZE on that table.
2182    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
2183    /// ANALYZE)` — combines PG-style fractional + absolute lower
2184    /// bound so a fresh / tiny table doesn't get hammered on every
2185    /// INSERT.
2186    ///
2187    /// Designed to be cheap: walks every user table's
2188    /// `Catalog::table_names()` + reads `statistics::modified_
2189    /// since_last_analyze()` (BTreeMap lookup). The background
2190    /// worker calls this under `engine.read()` then drops the lock
2191    /// before re-acquiring `engine.write()` for the actual ANALYZE.
2192    pub fn tables_needing_analyze(&self) -> Vec<String> {
2193        const MIN_ROWS: u64 = 100;
2194        let mut out = Vec::new();
2195        for name in self.catalog.table_names() {
2196            if is_internal_table_name(&name) {
2197                continue;
2198            }
2199            let Some(table) = self.catalog.get(&name) else {
2200                continue;
2201            };
2202            let row_count = table.rows().len() as u64;
2203            let modified = self.statistics.modified_since_last_analyze(&name);
2204            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
2205            // computed in integer arithmetic so spg-engine stays
2206            // no_std without pulling in libm. `(n + 9) / 10` is
2207            // `ceil(n / 10)` for non-negative `n`.
2208            let base = row_count.max(MIN_ROWS);
2209            let threshold = base.saturating_add(9) / 10;
2210            if modified >= threshold {
2211                out.push(name);
2212            }
2213        }
2214        out
2215    }
2216
2217    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2218    /// every user table; `ANALYZE <name>` re-stats one. For each
2219    /// target table, single-pass scan + per-column histogram +
2220    /// `null_frac` + `n_distinct`. Replaces the table's prior
2221    /// stats; resets the modified-row counter.
2222    ///
2223    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2224    /// can add reservoir sampling at the > 100 K-row mark; not a
2225    /// scope blocker for the current commit since rows ≤ 100 K
2226    /// analyse in milliseconds.
2227    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2228        let names: Vec<String> = if let Some(name) = target {
2229            // Verify the table exists; surface a clear error if not.
2230            if self.catalog.get(name).is_none() {
2231                return Err(EngineError::Storage(StorageError::TableNotFound {
2232                    name: name.to_string(),
2233                }));
2234            }
2235            alloc::vec![name.to_string()]
2236        } else {
2237            self.catalog
2238                .table_names()
2239                .into_iter()
2240                .filter(|n| !is_internal_table_name(n))
2241                .collect()
2242        };
2243        let mut analysed = 0usize;
2244        for table_name in &names {
2245            self.analyze_one_table(table_name)?;
2246            analysed += 1;
2247        }
2248        // v6.3.1 — plan cache invalidation. Bump stats version so
2249        // future lookups see the new generation, and selectively
2250        // evict every plan whose `source_tables` overlap with the
2251        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2252        // whole cache.
2253        if analysed > 0 {
2254            self.statistics.bump_version();
2255            if target.is_some() {
2256                for t in &names {
2257                    self.plan_cache.evict_referencing(t);
2258                }
2259            } else {
2260                self.plan_cache.clear();
2261            }
2262        }
2263        Ok(QueryResult::CommandOk {
2264            affected: analysed,
2265            modified_catalog: true,
2266        })
2267    }
2268
2269    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2270    /// engine-layer compaction shim with the default
2271    /// 4 MiB segment-size threshold. spg-server intercepts the
2272    /// SQL before it reaches the engine on a server build —
2273    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2274    /// `Engine::compact_cold_segments_with_target` directly with
2275    /// the env value, and persists every merged segment to
2276    /// v7.12.1 — record a `SET <name> = <value>` parameter. Names
2277    /// are case-folded to lowercase to match PG; values keep their
2278    /// caller-supplied form so observability paths see what was
2279    /// requested. Only `default_text_search_config` is consulted by
2280    /// the engine today.
2281    fn set_session_param(&mut self, name: String, value: spg_sql::ast::SetValue) {
2282        let normalised = match value {
2283            spg_sql::ast::SetValue::String(s) => s,
2284            spg_sql::ast::SetValue::Ident(s) => s,
2285            spg_sql::ast::SetValue::Number(s) => s,
2286            spg_sql::ast::SetValue::Default => String::new(),
2287        };
2288        let key = name.to_ascii_lowercase();
2289        // v7.14.0 — mysqldump preamble emits
2290        // `SET FOREIGN_KEY_CHECKS=0` so it can CREATE TABLE in any
2291        // order despite cross-table FK references; the closing
2292        // section emits `SET FOREIGN_KEY_CHECKS=1` (or
2293        // `=@OLD_FOREIGN_KEY_CHECKS` which resolves to "ON" in our
2294        // session-variable-aware path). Match both shapes.
2295        // Also accept PG's `session_replication_role = 'replica'`
2296        // which suppresses trigger + FK enforcement during a
2297        // logical replication apply (pg_dump preserves this for
2298        // schema-only mode but it shows up in some restores).
2299        let value_off = matches!(
2300            normalised.to_ascii_lowercase().as_str(),
2301            "0" | "off" | "false"
2302        );
2303        let value_on = matches!(
2304            normalised.to_ascii_lowercase().as_str(),
2305            "1" | "on" | "true"
2306        );
2307        if key == "foreign_key_checks"
2308            || key == "session_replication_role" && normalised.eq_ignore_ascii_case("replica")
2309        {
2310            if value_off || key == "session_replication_role" {
2311                self.foreign_key_checks = false;
2312            } else if value_on
2313                || (key == "session_replication_role"
2314                    && normalised.eq_ignore_ascii_case("origin"))
2315            {
2316                self.foreign_key_checks = true;
2317                // Drain pending FK queue against the now-complete
2318                // catalog. Errors here surface as the SET reply —
2319                // caller knows enabling checks revealed orphans.
2320                let _ = self.drain_pending_foreign_keys();
2321            }
2322        }
2323        self.session_params.insert(key, normalised);
2324    }
2325
2326    /// v7.14.0 — resolve every queued FK whose installation was
2327    /// deferred (`SET FOREIGN_KEY_CHECKS=0` window). Called by
2328    /// `set_session_param` when checks flip back on and by the
2329    /// drop-import release gate. Each FK is resolved against the
2330    /// current catalog; remaining missing-parent errors propagate
2331    /// up so the caller knows the import was incomplete.
2332    fn drain_pending_foreign_keys(&mut self) -> Result<(), EngineError> {
2333        let pending = core::mem::take(&mut self.pending_foreign_keys);
2334        for (child, fk) in pending {
2335            // Resolve against the current catalog. Skip silently
2336            // when the child table itself was dropped between
2337            // queue + drain.
2338            let cols_snapshot = match self.active_catalog().get(&child) {
2339                Some(t) => t.schema().columns.clone(),
2340                None => continue,
2341            };
2342            let storage_fk = resolve_foreign_key(&child, &cols_snapshot, fk, self.active_catalog())?;
2343            let table = self
2344                .active_catalog_mut()
2345                .get_mut(&child)
2346                .expect("checked above");
2347            table.schema_mut().foreign_keys.push(storage_fk);
2348        }
2349        Ok(())
2350    }
2351
2352    /// v7.12.1 — read a session parameter set via `SET`. Used by
2353    /// the FTS function dispatcher to resolve the default config
2354    /// for `to_tsvector(text)` / `plainto_tsquery(text)` etc.
2355    #[must_use]
2356    pub fn session_param(&self, name: &str) -> Option<&str> {
2357        self.session_params
2358            .get(&name.to_ascii_lowercase())
2359            .map(String::as_str)
2360    }
2361
2362    /// v7.12.1 — build an `EvalContext` chained with the session's
2363    /// `default_text_search_config`. Engine-internal callers use
2364    /// this instead of `EvalContext::new` so the FTS function
2365    /// dispatcher sees the SET configuration.
2366    fn ev_ctx<'a>(
2367        &'a self,
2368        columns: &'a [ColumnSchema],
2369        alias: Option<&'a str>,
2370    ) -> EvalContext<'a> {
2371        EvalContext::new(columns, alias)
2372            .with_default_text_search_config(self.session_param("default_text_search_config"))
2373    }
2374
2375    /// `<db>.spg/segments/`. This arm only fires for engine-only
2376    /// callers (spg-embedded, lib tests); in that mode merged
2377    /// segments live in memory and are dropped at process exit.
2378    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2379        let target = COMPACTION_TARGET_DEFAULT_BYTES;
2380        let reports = self.compact_cold_segments_with_target(target)?;
2381        let columns = alloc::vec![
2382            ColumnSchema::new("table_name", DataType::Text, false),
2383            ColumnSchema::new("index_name", DataType::Text, false),
2384            ColumnSchema::new("sources_merged", DataType::BigInt, false),
2385            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2386            ColumnSchema::new("merged_rows", DataType::BigInt, false),
2387            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2388            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2389        ];
2390        let rows: Vec<Row> = reports
2391            .into_iter()
2392            .map(|(tname, iname, report)| {
2393                Row::new(alloc::vec![
2394                    Value::Text(tname),
2395                    Value::Text(iname),
2396                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2397                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2398                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2399                    Value::BigInt(i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),),
2400                    Value::BigInt(
2401                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2402                    ),
2403                ])
2404            })
2405            .collect();
2406        Ok(QueryResult::Rows { columns, rows })
2407    }
2408
2409    /// Walk a single table's rows once and (re-)populate per-column
2410    /// stats. Drops the existing stats for `table` first so columns
2411    /// that have been DROP-ed between ANALYZEs don't leave stale
2412    /// rows.
2413    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2414        let table = self.catalog.get(table_name).ok_or_else(|| {
2415            EngineError::Storage(StorageError::TableNotFound {
2416                name: table_name.to_string(),
2417            })
2418        })?;
2419        let schema = table.schema().clone();
2420        let row_count = table.rows().len();
2421        // For each column, collect (sorted) non-NULL textual values
2422        // + count NULLs; then ask `statistics::build_histogram` to
2423        // produce the 101 bounds and `estimate_n_distinct` the
2424        // distinct count.
2425        self.statistics.clear_table(table_name);
2426        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2427            // v6.2.0 skip: vector columns have their own stats
2428            // shape (HNSW graph topology). v6.2 deliberation #1.
2429            if matches!(col_schema.ty, DataType::Vector { .. }) {
2430                continue;
2431            }
2432            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2433            let mut nulls: u64 = 0;
2434            for row in table.rows() {
2435                match row.values.get(col_pos) {
2436                    Some(Value::Null) | None => nulls += 1,
2437                    Some(v) => non_null_values.push(v.clone()),
2438                }
2439            }
2440            // Sort by type-aware ordering (Int as int, Text as
2441            // lex, etc.) so histogram bounds reflect the column's
2442            // natural order — not lexicographic on the string
2443            // representation, which would put "9" after "49".
2444            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2445            let non_null: Vec<String> = non_null_values.iter().map(canonical_value_repr).collect();
2446            let null_frac = if row_count == 0 {
2447                0.0
2448            } else {
2449                #[allow(clippy::cast_precision_loss)]
2450                let f = nulls as f32 / row_count as f32;
2451                f
2452            };
2453            let n_distinct = statistics::estimate_n_distinct(&non_null);
2454            let histogram_bounds = statistics::build_histogram(&non_null);
2455            self.statistics.set(
2456                table_name.to_string(),
2457                col_schema.name.clone(),
2458                statistics::ColumnStats {
2459                    null_frac,
2460                    n_distinct,
2461                    histogram_bounds,
2462                },
2463            );
2464        }
2465        self.statistics.reset_modified(table_name);
2466        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
2467        // BTree indices and count Cold locators (MAX across
2468        // indices); store the result on the table. Surfaced via
2469        // `spg_statistic.cold_row_count` (new column) and
2470        // `spg_stat_segment.table_name` (new column).
2471        let cold_count = {
2472            let table = self
2473                .active_catalog()
2474                .get(table_name)
2475                .expect("table still present");
2476            table.count_cold_locators()
2477        };
2478        let table_mut = self
2479            .active_catalog_mut()
2480            .get_mut(table_name)
2481            .expect("table still present");
2482        table_mut.set_cold_row_count(cold_count);
2483        Ok(())
2484    }
2485
2486    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
2487    /// `(name, scope, table_count)` ordered by publication name.
2488    ///   - `scope` is the human-readable string:
2489    ///       `"FOR ALL TABLES"` /
2490    ///       `"FOR TABLE t1, t2"` /
2491    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
2492    ///   - `table_count` is NULL for `AllTables`, the list length
2493    ///     otherwise. NULLability lets clients distinguish "publish
2494    ///     everything" from "publish exactly 0 tables" (the v6.1.3
2495    ///     parser forbids the empty list, but the column shape is
2496    ///     ready for the v6.1.5 publisher-side semantics).
2497    fn exec_show_publications(&self) -> QueryResult {
2498        let columns = alloc::vec![
2499            ColumnSchema::new("name", DataType::Text, false),
2500            ColumnSchema::new("scope", DataType::Text, false),
2501            ColumnSchema::new("table_count", DataType::Int, true),
2502        ];
2503        let rows: Vec<Row> = self
2504            .publications
2505            .iter()
2506            .map(|(name, scope)| {
2507                let (scope_str, count_val) = match scope {
2508                    spg_sql::ast::PublicationScope::AllTables => {
2509                        ("FOR ALL TABLES".to_string(), Value::Null)
2510                    }
2511                    spg_sql::ast::PublicationScope::ForTables(ts) => (
2512                        alloc::format!("FOR TABLE {}", ts.join(", ")),
2513                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2514                    ),
2515                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
2516                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
2517                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2518                    ),
2519                };
2520                Row::new(alloc::vec![
2521                    Value::Text(name.clone()),
2522                    Value::Text(scope_str),
2523                    count_val,
2524                ])
2525            })
2526            .collect();
2527        QueryResult::Rows { columns, rows }
2528    }
2529
2530    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
2531    fn exec_show_users(&self) -> QueryResult {
2532        let columns = alloc::vec![
2533            ColumnSchema::new("name", DataType::Text, false),
2534            ColumnSchema::new("role", DataType::Text, false),
2535        ];
2536        let rows: Vec<Row> = self
2537            .users
2538            .iter()
2539            .map(|(name, rec)| {
2540                Row::new(alloc::vec![
2541                    Value::Text(name.to_string()),
2542                    Value::Text(rec.role.as_str().to_string()),
2543                ])
2544            })
2545            .collect();
2546        QueryResult::Rows { columns, rows }
2547    }
2548
2549    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
2550        if self.in_transaction() {
2551            return Err(EngineError::Unsupported(
2552                "CREATE USER is not allowed inside a transaction".into(),
2553            ));
2554        }
2555        let role = users::Role::parse(&s.role).ok_or_else(|| {
2556            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
2557        })?;
2558        // Prefer the host-injected RNG. Falls back to a deterministic
2559        // salt derived from the username only when no RNG is wired —
2560        // acceptable for tests; the server always installs one.
2561        let salt = self.salt_fn.map_or_else(
2562            || {
2563                let mut s_bytes = [0u8; 16];
2564                let digest = spg_crypto::hash(s.name.as_bytes());
2565                s_bytes.copy_from_slice(&digest[..16]);
2566                s_bytes
2567            },
2568            |f| f(),
2569        );
2570        self.users
2571            .create(&s.name, &s.password, role, salt)
2572            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
2573        Ok(QueryResult::CommandOk {
2574            affected: 1,
2575            modified_catalog: true,
2576        })
2577    }
2578
2579    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2580        if self.in_transaction() {
2581            return Err(EngineError::Unsupported(
2582                "DROP USER is not allowed inside a transaction".into(),
2583            ));
2584        }
2585        self.users
2586            .drop(name)
2587            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
2588        Ok(QueryResult::CommandOk {
2589            affected: 1,
2590            modified_catalog: true,
2591        })
2592    }
2593
2594    /// v7.12.4 — `CREATE [OR REPLACE] FUNCTION`. Stores the
2595    /// function metadata in the catalog. PL/pgSQL bodies are
2596    /// already parsed by the SQL parser; we re-canonicalise the
2597    /// body to source text for storage (the executor re-parses
2598    /// it at trigger fire time — see the trigger fire path).
2599    fn exec_create_function(
2600        &mut self,
2601        s: spg_sql::ast::CreateFunctionStatement,
2602    ) -> Result<QueryResult, EngineError> {
2603        let args_repr = render_function_args(&s.args);
2604        let returns = match &s.returns {
2605            spg_sql::ast::FunctionReturn::Trigger => alloc::string::String::from("TRIGGER"),
2606            spg_sql::ast::FunctionReturn::Void => alloc::string::String::from("VOID"),
2607            spg_sql::ast::FunctionReturn::Type(t) => alloc::format!("{t}"),
2608            spg_sql::ast::FunctionReturn::Other(s) => s.clone(),
2609        };
2610        let body_text = match &s.body {
2611            spg_sql::ast::FunctionBody::PlPgSql(b) => alloc::format!("{b}"),
2612            spg_sql::ast::FunctionBody::Raw(s) => s.clone(),
2613        };
2614        let def = spg_storage::FunctionDef {
2615            name: s.name.clone(),
2616            args_repr,
2617            returns,
2618            language: s.language.clone(),
2619            body: body_text,
2620        };
2621        self.active_catalog_mut()
2622            .create_function(def, s.or_replace)
2623            .map_err(EngineError::Storage)?;
2624        Ok(QueryResult::CommandOk {
2625            affected: 0,
2626            modified_catalog: true,
2627        })
2628    }
2629
2630    /// v7.12.4 — `CREATE [OR REPLACE] TRIGGER`. The referenced
2631    /// function must already exist in the catalog (forward
2632    /// references defer to a later release). Persists the
2633    /// trigger metadata for the row-write hooks below to consult.
2634    fn exec_create_trigger(
2635        &mut self,
2636        s: spg_sql::ast::CreateTriggerStatement,
2637    ) -> Result<QueryResult, EngineError> {
2638        let timing = match s.timing {
2639            spg_sql::ast::TriggerTiming::Before => "BEFORE",
2640            spg_sql::ast::TriggerTiming::After => "AFTER",
2641            spg_sql::ast::TriggerTiming::InsteadOf => "INSTEAD OF",
2642        };
2643        let events: Vec<alloc::string::String> = s
2644            .events
2645            .iter()
2646            .map(|e| match e {
2647                spg_sql::ast::TriggerEvent::Insert => alloc::string::String::from("INSERT"),
2648                spg_sql::ast::TriggerEvent::Update => alloc::string::String::from("UPDATE"),
2649                spg_sql::ast::TriggerEvent::Delete => alloc::string::String::from("DELETE"),
2650                spg_sql::ast::TriggerEvent::Truncate => alloc::string::String::from("TRUNCATE"),
2651            })
2652            .collect();
2653        let for_each = match s.for_each {
2654            spg_sql::ast::TriggerForEach::Row => "ROW",
2655            spg_sql::ast::TriggerForEach::Statement => "STATEMENT",
2656        };
2657        let def = spg_storage::TriggerDef {
2658            name: s.name.clone(),
2659            table: s.table.clone(),
2660            timing: alloc::string::String::from(timing),
2661            events,
2662            for_each: alloc::string::String::from(for_each),
2663            function: s.function.clone(),
2664            update_columns: s.update_columns.clone(),
2665            // v7.16.1 — every trigger is born enabled. Toggled
2666            // by ALTER TABLE … { ENABLE | DISABLE } TRIGGER.
2667            enabled: true,
2668        };
2669        self.active_catalog_mut()
2670            .create_trigger(def, s.or_replace)
2671            .map_err(EngineError::Storage)?;
2672        Ok(QueryResult::CommandOk {
2673            affected: 0,
2674            modified_catalog: true,
2675        })
2676    }
2677
2678    fn exec_drop_trigger(
2679        &mut self,
2680        name: &str,
2681        table: &str,
2682        if_exists: bool,
2683    ) -> Result<QueryResult, EngineError> {
2684        let removed = self.active_catalog_mut().drop_trigger(name, table);
2685        if !removed && !if_exists {
2686            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2687                alloc::format!("trigger {name:?} on {table:?} does not exist"),
2688            )));
2689        }
2690        Ok(QueryResult::CommandOk {
2691            affected: usize::from(removed),
2692            modified_catalog: removed,
2693        })
2694    }
2695
2696    fn exec_drop_function(
2697        &mut self,
2698        name: &str,
2699        if_exists: bool,
2700    ) -> Result<QueryResult, EngineError> {
2701        let removed = self.active_catalog_mut().drop_function(name);
2702        if !removed && !if_exists {
2703            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2704                alloc::format!("function {name:?} does not exist"),
2705            )));
2706        }
2707        Ok(QueryResult::CommandOk {
2708            affected: usize::from(removed),
2709            modified_catalog: removed,
2710        })
2711    }
2712
2713    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
2714    /// Filter pass uses the same WHERE eval as `exec_select`. Per
2715    /// matched row, evaluate each RHS expression against the *old*
2716    /// row, then call `Table::update_row` which rebuilds indices.
2717    /// Indexed columns are correctly reflected because rebuild
2718    /// happens after the cell rewrite.
2719    fn exec_update_cancel(
2720        &mut self,
2721        stmt: &spg_sql::ast::UpdateStatement,
2722        cancel: CancelToken<'_>,
2723    ) -> Result<QueryResult, EngineError> {
2724        // v7.12.5 — snapshot BEFORE/AFTER UPDATE row triggers + the
2725        // session FTS config before the table mut-borrow opens (the
2726        // INSERT path uses the same pattern). Empty vecs are the
2727        // common "no triggers on this table" fast path.
2728        // v7.13.0 — UPDATE triggers carry an optional `UPDATE OF
2729        // cols` filter. The filter is paired with each function so
2730        // the per-row fire loop can skip when no listed column
2731        // actually differs between OLD and NEW.
2732        let before_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "BEFORE");
2733        let after_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "AFTER");
2734        let trigger_session_cfg: Option<String> = self
2735            .session_params
2736            .get("default_text_search_config")
2737            .cloned();
2738        // v5.2.3: if the WHERE is a PK equality and matches a cold-
2739        // tier row, promote it back to the hot tier *before* the
2740        // hot-row walk. The promote pushes the row to the end of
2741        // `table.rows`, where the upcoming SET-evaluation loop will
2742        // pick it up and apply the assignments. Lookups for the key
2743        // never observe a gap because `promote_cold_row` inserts the
2744        // hot row before retiring the cold locator.
2745        if let Some(w) = &stmt.where_ {
2746            let schema_cols = self
2747                .active_catalog()
2748                .get(&stmt.table)
2749                .ok_or_else(|| {
2750                    EngineError::Storage(StorageError::TableNotFound {
2751                        name: stmt.table.clone(),
2752                    })
2753                })?
2754                .schema()
2755                .columns
2756                .clone();
2757            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2758                && let Some(idx_name) = self
2759                    .active_catalog()
2760                    .get(&stmt.table)
2761                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2762            {
2763                // Promote may be a no-op (key is hot-only or absent);
2764                // we don't care about the return value here — the
2765                // subsequent hot walk will either match or not.
2766                let _ = self
2767                    .active_catalog_mut()
2768                    .promote_cold_row(&stmt.table, &idx_name, &key);
2769            }
2770        }
2771
2772        // v7.12.1 — cache session FTS config before the table
2773        // mut-borrow (same reason as exec_delete).
2774        let ts_cfg: Option<String> = self
2775            .session_param("default_text_search_config")
2776            .map(String::from);
2777        let table = self
2778            .active_catalog_mut()
2779            .get_mut(&stmt.table)
2780            .ok_or_else(|| {
2781                EngineError::Storage(StorageError::TableNotFound {
2782                    name: stmt.table.clone(),
2783                })
2784            })?;
2785        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2786        // Resolve each SET target to a column position once, validate
2787        // up front so a typo'd column doesn't leave a partial mutation
2788        // behind.
2789        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
2790        for (col, expr) in &stmt.assignments {
2791            let pos = schema_cols
2792                .iter()
2793                .position(|c| c.name == *col)
2794                .ok_or_else(|| {
2795                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
2796                })?;
2797            targets.push((pos, expr));
2798        }
2799        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
2800            .with_default_text_search_config(ts_cfg.as_deref());
2801        // Walk every row, evaluate WHERE then SET expressions. We
2802        // gather (position, new_values) tuples first and apply them
2803        // afterwards so the WHERE/RHS evaluation reads the original
2804        // row state — matches PG semantics (UPDATE doesn't see its
2805        // own writes).
2806        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
2807        for (i, row) in table.rows().iter().enumerate() {
2808            // v4.5: cooperative cancel checkpoint every 256 rows so
2809            // a runaway UPDATE without WHERE doesn't drag past the
2810            // server's query-timeout watchdog.
2811            if i.is_multiple_of(256) {
2812                cancel.check()?;
2813            }
2814            if let Some(w) = &stmt.where_ {
2815                let cond = eval::eval_expr(w, row, &ctx)?;
2816                if !matches!(cond, Value::Bool(true)) {
2817                    continue;
2818                }
2819            }
2820            let mut new_vals = row.values.clone();
2821            for (pos, expr) in &targets {
2822                let v = eval::eval_expr(expr, row, &ctx)?;
2823                new_vals[*pos] =
2824                    coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
2825            }
2826            planned.push((i, new_vals));
2827        }
2828        // v7.6.6 — capture pre-update row values for the FK
2829        // enforcement passes below. `planned` carries new values
2830        // only; pair them with the old row.
2831        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
2832            .iter()
2833            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
2834            .collect();
2835        let self_fks = table.schema().foreign_keys.clone();
2836        // v7.12.5 — `affected` is computed post-BEFORE-trigger
2837        // below (triggers may RETURN NULL to skip individual
2838        // rows). The pre-trigger len shape is no longer accurate.
2839        // Release mutable borrow on `table` for the FK passes.
2840        let _ = table;
2841        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
2842        // local FK columns changed, the new value must exist in the
2843        // parent.
2844        if !self_fks.is_empty() {
2845            let new_rows: Vec<Vec<Value>> = planned
2846                .iter()
2847                .map(|(_pos, new_vals)| new_vals.clone())
2848                .collect();
2849            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
2850        }
2851        // v7.13.0 — CHECK constraint enforcement on UPDATE
2852        // (mailrs round-5 G3). Predicates evaluated against the
2853        // candidate post-UPDATE row; false rejects the UPDATE.
2854        {
2855            let new_rows: Vec<Vec<Value>> = planned
2856                .iter()
2857                .map(|(_pos, new_vals)| new_vals.clone())
2858                .collect();
2859            enforce_check_constraints(self.active_catalog(), &stmt.table, &new_rows)?;
2860        }
2861        // v7.6.6 — Stage 2b: inbound FK check. For every row that
2862        // changed value in a column that *some other table* uses as
2863        // a FK parent column, react per `on_update` action.
2864        let child_plan =
2865            plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
2866        // Stage 3a — apply each child-side action.
2867        for step in &child_plan {
2868            apply_fk_child_step(self.active_catalog_mut(), step)?;
2869        }
2870        // Stage 3b — apply the original UPDATE.
2871        let table = self
2872            .active_catalog_mut()
2873            .get_mut(&stmt.table)
2874            .ok_or_else(|| {
2875                EngineError::Storage(StorageError::TableNotFound {
2876                    name: stmt.table.clone(),
2877                })
2878            })?;
2879        // v7.12.5 — fire BEFORE/AFTER UPDATE row-level triggers
2880        // around the apply loop. BEFORE sees NEW=candidate +
2881        // OLD=current; may rewrite NEW or RETURN NULL to skip.
2882        // AFTER sees NEW=post-write + OLD=pre-write (both read-
2883        // only).
2884        //
2885        // Filter `planned` through the BEFORE pass first so the
2886        // RETURNING snapshot reflects what actually got written
2887        // (triggers may rewrite cells, including a cancellation).
2888        let mut applied_after_before: Vec<(usize, Row, Row)> = Vec::with_capacity(planned.len());
2889        // v7.12.7 — embedded SQL queue.
2890        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
2891        for (pos, new_vals) in &planned {
2892            let old_row = table.rows()[*pos].clone();
2893            let mut new_row = Row::new(new_vals.clone());
2894            let mut skip = false;
2895            for (fd, filter) in &before_update_triggers {
2896                // v7.13.0 — `UPDATE OF cols` filter (mailrs round-5
2897                // G7). Skip this trigger when the filter is set and
2898                // no listed column actually differs between OLD and
2899                // NEW for this row.
2900                if !filter.is_empty()
2901                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2902                {
2903                    continue;
2904                }
2905                let (outcome, deferred) = triggers::fire_row_trigger(
2906                    fd,
2907                    Some(new_row.clone()),
2908                    Some(&old_row),
2909                    &stmt.table,
2910                    &schema_cols,
2911                    &[],
2912                    trigger_session_cfg.as_deref(),
2913                    false,
2914                )
2915                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2916                deferred_embedded.extend(deferred);
2917                match outcome {
2918                    triggers::TriggerOutcome::Row(r) => new_row = r,
2919                    triggers::TriggerOutcome::Skip => {
2920                        skip = true;
2921                        break;
2922                    }
2923                }
2924            }
2925            if !skip {
2926                applied_after_before.push((*pos, new_row, old_row));
2927            }
2928        }
2929        // v7.9.4 — snapshot post-update values for RETURNING (post-
2930        // BEFORE-trigger because triggers can rewrite cells).
2931        let updated_for_returning: Vec<Vec<Value>> = if stmt.returning.is_some() {
2932            applied_after_before
2933                .iter()
2934                .map(|(_pos, new_row, _old)| new_row.values.clone())
2935                .collect()
2936        } else {
2937            Vec::new()
2938        };
2939        let affected = applied_after_before.len();
2940        // Apply, then fire AFTER triggers per row. AFTER runs read-
2941        // only against the freshly-written row; v7.12.4-shape
2942        // assignment errors with a clear message.
2943        for (pos, new_row, old_row) in applied_after_before {
2944            table.update_row(pos, new_row.values.clone())?;
2945            for (fd, filter) in &after_update_triggers {
2946                if !filter.is_empty()
2947                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2948                {
2949                    continue;
2950                }
2951                let (_outcome, deferred) = triggers::fire_row_trigger(
2952                    fd,
2953                    Some(new_row.clone()),
2954                    Some(&old_row),
2955                    &stmt.table,
2956                    &schema_cols,
2957                    &[],
2958                    trigger_session_cfg.as_deref(),
2959                    true,
2960                )
2961                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2962                deferred_embedded.extend(deferred);
2963            }
2964        }
2965        let _ = table;
2966        // v7.12.7 — drain trigger-emitted embedded SQL for this UPDATE.
2967        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
2968        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
2969        if !self.in_transaction() && affected > 0 {
2970            self.statistics
2971                .record_modifications(&stmt.table, affected as u64);
2972        }
2973        // v7.9.4 — RETURNING projection.
2974        if let Some(items) = &stmt.returning {
2975            return self.build_returning_rows(&stmt.table, items, updated_for_returning);
2976        }
2977        Ok(QueryResult::CommandOk {
2978            affected,
2979            modified_catalog: !self.in_transaction(),
2980        })
2981    }
2982
2983    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
2984    /// positions then delegates to `Table::delete_rows` (single index
2985    /// rebuild for the batch).
2986    fn exec_delete_cancel(
2987        &mut self,
2988        stmt: &spg_sql::ast::DeleteStatement,
2989        cancel: CancelToken<'_>,
2990    ) -> Result<QueryResult, EngineError> {
2991        // v7.12.5 — snapshot BEFORE/AFTER DELETE row triggers + the
2992        // session FTS config before the mut borrow (same shape as
2993        // INSERT / UPDATE).
2994        let before_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "BEFORE");
2995        let after_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "AFTER");
2996        let trigger_session_cfg: Option<String> = self
2997            .session_params
2998            .get("default_text_search_config")
2999            .cloned();
3000        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
3001        // locator for the key. The cold row body stays in the
3002        // segment (becoming shadowed garbage that a future
3003        // compaction pass reclaims) but the index no longer
3004        // resolves it. The shadow count contributes to the
3005        // affected total; the subsequent hot walk handles any hot
3006        // rows for the same key.
3007        let mut cold_shadow_count: usize = 0;
3008        if let Some(w) = &stmt.where_ {
3009            let schema_cols = self
3010                .active_catalog()
3011                .get(&stmt.table)
3012                .ok_or_else(|| {
3013                    EngineError::Storage(StorageError::TableNotFound {
3014                        name: stmt.table.clone(),
3015                    })
3016                })?
3017                .schema()
3018                .columns
3019                .clone();
3020            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
3021                && let Some(idx_name) = self
3022                    .active_catalog()
3023                    .get(&stmt.table)
3024                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
3025            {
3026                cold_shadow_count = self
3027                    .active_catalog_mut()
3028                    .shadow_cold_row(&stmt.table, &idx_name, &key)
3029                    .unwrap_or(0);
3030            }
3031        }
3032
3033        // v7.12.1 — cache the session FTS config as an owned
3034        // String before the mutable table borrow below; the
3035        // ctx-builder then references it via `as_deref` so the
3036        // immutable read of `session_params` doesn't conflict
3037        // with the mut borrow chain.
3038        let ts_cfg: Option<String> = self
3039            .session_param("default_text_search_config")
3040            .map(String::from);
3041        let table = self
3042            .active_catalog_mut()
3043            .get_mut(&stmt.table)
3044            .ok_or_else(|| {
3045                EngineError::Storage(StorageError::TableNotFound {
3046                    name: stmt.table.clone(),
3047                })
3048            })?;
3049        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
3050        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
3051            .with_default_text_search_config(ts_cfg.as_deref());
3052        let mut positions: Vec<usize> = Vec::new();
3053        // v7.6.3 — collect every to-delete row's full Value tuple
3054        // alongside its position, so the FK enforcement pass can
3055        // run after the mut borrow drops.
3056        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
3057        for (i, row) in table.rows().iter().enumerate() {
3058            if i.is_multiple_of(256) {
3059                cancel.check()?;
3060            }
3061            let keep = if let Some(w) = &stmt.where_ {
3062                let cond = eval::eval_expr(w, row, &ctx)?;
3063                !matches!(cond, Value::Bool(true))
3064            } else {
3065                false
3066            };
3067            if !keep {
3068                positions.push(i);
3069                to_delete_rows.push(row.values.clone());
3070            }
3071        }
3072        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
3073        // catalog. Release the mut borrow and run reverse-scan
3074        // against every child table whose FK targets this table.
3075        // RESTRICT / NoAction raise an error; CASCADE returns a
3076        // cascade plan that stage 3 applies after the primary delete.
3077        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
3078        let _ = table;
3079        // v7.12.5 — BEFORE DELETE row-level triggers. Each fires
3080        // with NEW=None / OLD=pre-delete row; RETURN OLD (or NEW)
3081        // = proceed, RETURN NULL = skip the row entirely. The
3082        // filter must run BEFORE the FK cascade plan so cascaded
3083        // child rows track the trigger's skip-decision on the
3084        // parent.
3085        // v7.12.7 — embedded SQL queue.
3086        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
3087        if !before_delete_triggers.is_empty() {
3088            let mut filtered_positions: Vec<usize> = Vec::with_capacity(positions.len());
3089            let mut filtered_old_rows: Vec<Vec<Value>> = Vec::with_capacity(to_delete_rows.len());
3090            for (pos, old_vals) in positions.iter().zip(to_delete_rows.iter()) {
3091                let old_row = Row::new(old_vals.clone());
3092                let mut cancel_this = false;
3093                for fd in &before_delete_triggers {
3094                    let (outcome, deferred) = triggers::fire_row_trigger(
3095                        fd,
3096                        None,
3097                        Some(&old_row),
3098                        &stmt.table,
3099                        &schema_cols,
3100                        &[],
3101                        trigger_session_cfg.as_deref(),
3102                        false,
3103                    )
3104                    .map_err(|e| {
3105                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3106                    })?;
3107                    deferred_embedded.extend(deferred);
3108                    if matches!(outcome, triggers::TriggerOutcome::Skip) {
3109                        cancel_this = true;
3110                        break;
3111                    }
3112                }
3113                if !cancel_this {
3114                    filtered_positions.push(*pos);
3115                    filtered_old_rows.push(old_vals.clone());
3116                }
3117            }
3118            positions = filtered_positions;
3119            to_delete_rows = filtered_old_rows;
3120        }
3121        let cascade_plan = plan_fk_parent_deletions(
3122            self.active_catalog(),
3123            &stmt.table,
3124            &positions,
3125            &to_delete_rows,
3126        )?;
3127        // Stage 3a — apply each FK child step (SET NULL / SET
3128        // DEFAULT / CASCADE delete) before deleting the parent.
3129        // The plan is already ordered: nulls/defaults first, then
3130        // cascade deletes (so a row mutated and later deleted
3131        // surfaces as deleted — though v7.6.5 doesn't produce
3132        // that overlap today).
3133        for step in &cascade_plan {
3134            apply_fk_child_step(self.active_catalog_mut(), step)?;
3135        }
3136        // Stage 3b — actually delete the original target rows.
3137        let table = self
3138            .active_catalog_mut()
3139            .get_mut(&stmt.table)
3140            .ok_or_else(|| {
3141                EngineError::Storage(StorageError::TableNotFound {
3142                    name: stmt.table.clone(),
3143                })
3144            })?;
3145        let affected = table.delete_rows(&positions) + cold_shadow_count;
3146        let _ = table;
3147        // v7.12.5 — AFTER DELETE row-level triggers fire post-write
3148        // with NEW=None / OLD=pre-delete row (each from the
3149        // already-snapshotted to_delete_rows). Return value is
3150        // ignored (matches PG AFTER semantics).
3151        if !after_delete_triggers.is_empty() {
3152            for old_vals in &to_delete_rows {
3153                let old_row = Row::new(old_vals.clone());
3154                for fd in &after_delete_triggers {
3155                    let (_outcome, deferred) = triggers::fire_row_trigger(
3156                        fd,
3157                        None,
3158                        Some(&old_row),
3159                        &stmt.table,
3160                        &schema_cols,
3161                        &[],
3162                        trigger_session_cfg.as_deref(),
3163                        true,
3164                    )
3165                    .map_err(|e| {
3166                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3167                    })?;
3168                    deferred_embedded.extend(deferred);
3169                }
3170            }
3171        }
3172        // v7.12.7 — drain trigger-emitted embedded SQL for this DELETE.
3173        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
3174        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
3175        if !self.in_transaction() && affected > 0 {
3176            self.statistics
3177                .record_modifications(&stmt.table, affected as u64);
3178        }
3179        // v7.9.4 — RETURNING projection over the soon-to-be-gone
3180        // rows. `to_delete_rows` was snapshotted in stage 1 before
3181        // mutation, so the projection sees the pre-delete state
3182        // (matches PG semantics: DELETE RETURNING returns the row
3183        // as it was just before removal).
3184        if let Some(items) = &stmt.returning {
3185            return self.build_returning_rows(&stmt.table, items, to_delete_rows);
3186        }
3187        Ok(QueryResult::CommandOk {
3188            affected,
3189            modified_catalog: !self.in_transaction(),
3190        })
3191    }
3192
3193    /// `SHOW TABLES` — one row per table in the active catalog.
3194    /// Column name is `name` so result-set consumers can downstream
3195    /// `SELECT name FROM ...` style logic if needed.
3196    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
3197    /// `QUERY PLAN` text table — first line names the top operator
3198    /// (Scan / Aggregate / Window / etc.), indented children list
3199    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
3200    /// shape, and any active index hits. `ANALYZE` execs the inner
3201    /// SELECT and appends actual-row + elapsed-micros annotations.
3202    #[allow(clippy::format_push_string)]
3203    fn exec_explain(
3204        &self,
3205        e: &spg_sql::ast::ExplainStatement,
3206        cancel: CancelToken<'_>,
3207    ) -> Result<QueryResult, EngineError> {
3208        let mut lines = Vec::<String>::new();
3209        explain_select(&e.inner, self, 0, &mut lines);
3210        if e.suggest {
3211            // v6.8.3 — index advisor. Walks the SELECT's FROM
3212            // tables + WHERE column refs; for each (table, column)
3213            // pair that lacks an index, append a SUGGEST line with
3214            // a copy-pastable `CREATE INDEX` statement. This is a
3215            // pure-syntax heuristic — no cardinality estimation —
3216            // matching the v6.8.3 design intent of "tell the
3217            // operator where indexes are missing", not "give the
3218            // mathematically optimal index set".
3219            let suggestions = build_index_suggestions(&e.inner, self);
3220            for s in suggestions {
3221                lines.push(s);
3222            }
3223        } else if e.analyze {
3224            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
3225            // with `(rows=N)` where the row count is computable
3226            // without re-executing the full query:
3227            //   - Top-level operator (first non-indented line):
3228            //     rows = final result.len()
3229            //   - "From: <table> [full scan]" lines: rows =
3230            //     table.rows().len() (catalog read; no execution)
3231            //   - "From: <table> [index seek]": indeterminate —
3232            //     the index step would need re-execution; v6.2.5
3233            //     adds per-operator wall-clock + hot/cold rows
3234            //     instrumentation that makes this concrete.
3235            //   - Everything else: marked `(—)` so the surface
3236            //     stays well-defined without silently dropping
3237            //     stats. v6.2.5 fills in via inline executor
3238            //     instrumentation.
3239            // Total elapsed lands on a trailing `Total: …` line.
3240            let started = self.clock.map(|f| f());
3241            let exec = self.exec_select_cancel(&e.inner, cancel)?;
3242            let elapsed_micros = match (self.clock, started) {
3243                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
3244                _ => None,
3245            };
3246            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
3247                rows.len()
3248            } else {
3249                0
3250            };
3251            annotate_explain_lines(&mut lines, row_count, self);
3252            let mut total = alloc::format!("Total: rows={row_count}");
3253            if let Some(us) = elapsed_micros {
3254                total.push_str(&alloc::format!(" elapsed={us}us"));
3255            }
3256            lines.push(total);
3257        }
3258        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
3259        let rows: Vec<Row> = lines
3260            .into_iter()
3261            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
3262            .collect();
3263        Ok(QueryResult::Rows { columns, rows })
3264    }
3265
3266    fn exec_show_tables(&self) -> QueryResult {
3267        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
3268        let rows: Vec<Row> = self
3269            .active_catalog()
3270            .table_names()
3271            .into_iter()
3272            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
3273            .collect();
3274        QueryResult::Rows { columns, rows }
3275    }
3276
3277    /// `SHOW COLUMNS FROM <table>` — one row per column with the
3278    /// declared name, SQL type rendering, and nullability flag.
3279    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
3280        let table =
3281            self.active_catalog()
3282                .get(table_name)
3283                .ok_or_else(|| StorageError::TableNotFound {
3284                    name: table_name.into(),
3285                })?;
3286        let columns = alloc::vec![
3287            ColumnSchema::new("name", DataType::Text, false),
3288            ColumnSchema::new("type", DataType::Text, false),
3289            ColumnSchema::new("nullable", DataType::Bool, false),
3290        ];
3291        let rows: Vec<Row> = table
3292            .schema()
3293            .columns
3294            .iter()
3295            .map(|c| {
3296                Row::new(alloc::vec![
3297                    Value::Text(c.name.clone()),
3298                    Value::Text(alloc::format!("{}", c.ty)),
3299                    Value::Bool(c.nullable),
3300                ])
3301            })
3302            .collect();
3303        Ok(QueryResult::Rows { columns, rows })
3304    }
3305
3306    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
3307        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3308        if self.tx_catalogs.contains_key(&tx_id) {
3309            return Err(EngineError::TransactionAlreadyOpen);
3310        }
3311        self.tx_catalogs.insert(
3312            tx_id,
3313            TxState {
3314                catalog: self.catalog.clone(),
3315                savepoints: Vec::new(),
3316            },
3317        );
3318        Ok(QueryResult::CommandOk {
3319            affected: 0,
3320            modified_catalog: false,
3321        })
3322    }
3323
3324    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
3325        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3326        let state = self
3327            .tx_catalogs
3328            .remove(&tx_id)
3329            .ok_or(EngineError::NoActiveTransaction)?;
3330        self.catalog = state.catalog;
3331        // All savepoints become permanent at COMMIT and the stack
3332        // resets for the next TX (`state.savepoints` is discarded with
3333        // `state`).
3334        Ok(QueryResult::CommandOk {
3335            affected: 0,
3336            modified_catalog: true,
3337        })
3338    }
3339
3340    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
3341        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3342        if self.tx_catalogs.remove(&tx_id).is_none() {
3343            return Err(EngineError::NoActiveTransaction);
3344        }
3345        // savepoints discarded with the TxState
3346        Ok(QueryResult::CommandOk {
3347            affected: 0,
3348            modified_catalog: false,
3349        })
3350    }
3351
3352    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
3353        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3354        let state = self
3355            .tx_catalogs
3356            .get_mut(&tx_id)
3357            .ok_or(EngineError::NoActiveTransaction)?;
3358        // PG re-uses an existing savepoint name by dropping the older
3359        // entry and pushing a fresh one — match that behaviour so
3360        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
3361        state.savepoints.retain(|(n, _)| n != &name);
3362        let snapshot = state.catalog.clone();
3363        state.savepoints.push((name, snapshot));
3364        Ok(QueryResult::CommandOk {
3365            affected: 0,
3366            modified_catalog: false,
3367        })
3368    }
3369
3370    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3371        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3372        let state = self
3373            .tx_catalogs
3374            .get_mut(&tx_id)
3375            .ok_or(EngineError::NoActiveTransaction)?;
3376        let pos = state
3377            .savepoints
3378            .iter()
3379            .rposition(|(n, _)| n == name)
3380            .ok_or_else(|| {
3381                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3382            })?;
3383        // The savepoint stays on the stack (PG semantics): a later
3384        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
3385        // after it is discarded.
3386        let snapshot = state.savepoints[pos].1.clone();
3387        state.savepoints.truncate(pos + 1);
3388        state.catalog = snapshot;
3389        Ok(QueryResult::CommandOk {
3390            affected: 0,
3391            modified_catalog: false,
3392        })
3393    }
3394
3395    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3396        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3397        let state = self
3398            .tx_catalogs
3399            .get_mut(&tx_id)
3400            .ok_or(EngineError::NoActiveTransaction)?;
3401        let pos = state
3402            .savepoints
3403            .iter()
3404            .rposition(|(n, _)| n == name)
3405            .ok_or_else(|| {
3406                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3407            })?;
3408        // RELEASE keeps the work since the savepoint, just discards the
3409        // bookmark plus everything nested under it.
3410        state.savepoints.truncate(pos);
3411        Ok(QueryResult::CommandOk {
3412            affected: 0,
3413            modified_catalog: false,
3414        })
3415    }
3416
3417    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
3418    /// (encoding = …)]`. Walks every table in the active catalog
3419    /// looking for an index matching `stmt.name`, then delegates the
3420    /// rebuild (including any encoding switch) to
3421    /// `Table::rebuild_nsw_index`. The "live" non-blocking
3422    /// optimisation is v6.0.4.1 / v6.1.x territory.
3423    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
3424    /// arm. Currently the only setting is `hot_tier_bytes`; later
3425    /// v6.7.x can extend `AlterTableTarget` without touching this
3426    /// arm structure.
3427    fn exec_alter_table(
3428        &mut self,
3429        s: spg_sql::ast::AlterTableStatement,
3430    ) -> Result<QueryResult, EngineError> {
3431        // v7.13.2 — mailrs round-6 S1: apply each subaction in order.
3432        // On first error the statement aborts; subactions already
3433        // applied stay (no transactional rollback in v7.13 — wrap in
3434        // BEGIN/COMMIT if atomicity matters).
3435        let table_name = s.name.clone();
3436        for target in s.targets {
3437            self.exec_alter_table_subaction(&table_name, target)?;
3438        }
3439        Ok(QueryResult::CommandOk {
3440            affected: 0,
3441            modified_catalog: !self.in_transaction(),
3442        })
3443    }
3444
3445    fn exec_alter_table_subaction(
3446        &mut self,
3447        table_name_outer: &str,
3448        target: spg_sql::ast::AlterTableTarget,
3449    ) -> Result<(), EngineError> {
3450        // Inner helper retains the s.name closure shape; alias to `s`
3451        // for minimal diff against the v7.13.0 body.
3452        struct S<'a> {
3453            name: &'a str,
3454        }
3455        let s = S {
3456            name: table_name_outer,
3457        };
3458        match target {
3459            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
3460                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3461                    EngineError::Storage(StorageError::TableNotFound {
3462                        name: s.name.into(),
3463                    })
3464                })?;
3465                table.schema_mut().hot_tier_bytes = Some(n);
3466            }
3467            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
3468                // v7.6.8 — resolve FK against the live catalog first
3469                // (validates parent table, columns, indices). Then
3470                // verify every existing row in the child table
3471                // satisfies the new constraint. Then install it.
3472                let cols_snapshot = self
3473                    .active_catalog()
3474                    .get(s.name)
3475                    .ok_or_else(|| {
3476                        EngineError::Storage(StorageError::TableNotFound {
3477                            name: s.name.into(),
3478                        })
3479                    })?
3480                    .schema()
3481                    .columns
3482                    .clone();
3483                let storage_fk =
3484                    resolve_foreign_key(s.name, &cols_snapshot, fk, self.active_catalog())?;
3485                // Verify existing rows. Treat them as a virtual
3486                // INSERT batch — reusing the v7.6.2 enforce helper.
3487                let existing_rows: Vec<Vec<Value>> = self
3488                    .active_catalog()
3489                    .get(&s.name)
3490                    .expect("checked above")
3491                    .rows()
3492                    .iter()
3493                    .map(|r| r.values.clone())
3494                    .collect();
3495                enforce_fk_inserts(
3496                    self.active_catalog(),
3497                    s.name,
3498                    core::slice::from_ref(&storage_fk),
3499                    &existing_rows,
3500                )?;
3501                // Reject duplicate constraint name.
3502                let table = self
3503                    .active_catalog_mut()
3504                    .get_mut(s.name)
3505                    .expect("checked above");
3506                if let Some(name) = &storage_fk.name
3507                    && table
3508                        .schema()
3509                        .foreign_keys
3510                        .iter()
3511                        .any(|f| f.name.as_ref() == Some(name))
3512                {
3513                    return Err(EngineError::Unsupported(alloc::format!(
3514                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
3515                    )));
3516                }
3517                table.schema_mut().foreign_keys.push(storage_fk);
3518            }
3519            spg_sql::ast::AlterTableTarget::DropForeignKey { name, if_exists } => {
3520                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3521                    EngineError::Storage(StorageError::TableNotFound {
3522                        name: s.name.into(),
3523                    })
3524                })?;
3525                let fks = &mut table.schema_mut().foreign_keys;
3526                let before = fks.len();
3527                fks.retain(|f| f.name.as_ref() != Some(&name));
3528                if fks.len() == before && !if_exists {
3529                    return Err(EngineError::Unsupported(alloc::format!(
3530                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
3531                        s.name
3532                    )));
3533                }
3534                // v7.13.2 mailrs round-6 S7: IF EXISTS silences the miss.
3535            }
3536            spg_sql::ast::AlterTableTarget::AddColumn {
3537                column,
3538                if_not_exists,
3539            } => {
3540                // v7.13.0 — mailrs round-5 G1. Append-only column add
3541                // with back-fill of the DEFAULT (or NULL) into every
3542                // existing row. Column positions don't shift, so we
3543                // skip index rebuild.
3544                let clock = self.clock;
3545                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3546                    EngineError::Storage(StorageError::TableNotFound {
3547                        name: s.name.into(),
3548                    })
3549                })?;
3550                if table
3551                    .schema()
3552                    .columns
3553                    .iter()
3554                    .any(|c| c.name.eq_ignore_ascii_case(&column.name))
3555                {
3556                    if if_not_exists {
3557                        return Ok(());
3558                    }
3559                    return Err(EngineError::Unsupported(alloc::format!(
3560                        "ALTER TABLE ADD COLUMN: column {:?} already exists on {:?}",
3561                        column.name,
3562                        s.name
3563                    )));
3564                }
3565                let col_name = column.name.clone();
3566                let nullable = column.nullable;
3567                let has_default =
3568                    column.default.is_some() || column.auto_increment;
3569                let col_schema = column_def_to_schema(column)?;
3570                let row_count = table.row_count();
3571                // Compute the back-fill value. Literal / runtime DEFAULT
3572                // funnels through the same resolver that INSERT uses
3573                // (v7.9.21 `resolve_column_default_free`). NULL when
3574                // the column is nullable and has no DEFAULT. NOT NULL
3575                // without DEFAULT errors when the table has existing
3576                // rows — same as PG.
3577                let fill_value: Value = if has_default
3578                    || col_schema.runtime_default.is_some()
3579                {
3580                    resolve_column_default_free(&col_schema, clock)?
3581                } else if nullable || row_count == 0 {
3582                    Value::Null
3583                } else {
3584                    return Err(EngineError::Unsupported(alloc::format!(
3585                        "ALTER TABLE ADD COLUMN {col_name:?}: NOT NULL column requires DEFAULT \
3586                         when the table has existing rows"
3587                    )));
3588                };
3589                table.add_column(col_schema, fill_value);
3590            }
3591            spg_sql::ast::AlterTableTarget::AlterColumnType {
3592                column,
3593                new_type,
3594                using,
3595            } => {
3596                // v7.13.0 — mailrs round-5 G8. Re-evaluate each
3597                // row's column value (either through the USING
3598                // expression if supplied, or as a direct CAST of
3599                // the existing value) and re-coerce to the new
3600                // type. Indices on the column get rebuilt.
3601                let new_data_type = column_type_to_data_type(new_type);
3602                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3603                    EngineError::Storage(StorageError::TableNotFound {
3604                        name: s.name.into(),
3605                    })
3606                })?;
3607                let col_pos = table
3608                    .schema()
3609                    .columns
3610                    .iter()
3611                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3612                    .ok_or_else(|| {
3613                        EngineError::Unsupported(alloc::format!(
3614                            "ALTER COLUMN TYPE: column {column:?} not found on {:?}",
3615                            s.name
3616                        ))
3617                    })?;
3618                let schema_cols = table.schema().columns.clone();
3619                let ctx = eval::EvalContext::new(&schema_cols, None);
3620                let mut new_values: alloc::vec::Vec<Value> =
3621                    alloc::vec::Vec::with_capacity(table.row_count());
3622                for row in table.rows().iter() {
3623                    let raw = match &using {
3624                        Some(expr) => eval::eval_expr(expr, row, &ctx).map_err(|e| {
3625                            EngineError::Unsupported(alloc::format!(
3626                                "ALTER COLUMN TYPE: USING expression failed: {e:?}"
3627                            ))
3628                        })?,
3629                        None => row.values.get(col_pos).cloned().unwrap_or(Value::Null),
3630                    };
3631                    let coerced = coerce_value(raw, new_data_type, &column, col_pos)?;
3632                    new_values.push(coerced);
3633                }
3634                table.schema_mut().columns[col_pos].ty = new_data_type;
3635                for (i, v) in new_values.into_iter().enumerate() {
3636                    let mut row_values = table
3637                        .rows()
3638                        .get(i)
3639                        .expect("bounds-checked above")
3640                        .values
3641                        .clone();
3642                    row_values[col_pos] = v;
3643                    table.update_row(i, row_values)?;
3644                }
3645            }
3646            spg_sql::ast::AlterTableTarget::AddTableConstraint(tc) => {
3647                // v7.14.0 — pg_dump emits PKs as a separate
3648                // ALTER TABLE ADD CONSTRAINT post-CREATE-TABLE.
3649                // For PRIMARY KEY / UNIQUE, install a UC entry
3650                // and the implicit BTree index on the leading
3651                // column. CHECK: append predicate to schema.
3652                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3653                    EngineError::Storage(StorageError::TableNotFound {
3654                        name: s.name.into(),
3655                    })
3656                })?;
3657                let is_pk = matches!(
3658                    tc,
3659                    spg_sql::ast::TableConstraint::PrimaryKey { .. }
3660                );
3661                match tc {
3662                    spg_sql::ast::TableConstraint::PrimaryKey { columns, .. }
3663                    | spg_sql::ast::TableConstraint::Unique { columns, .. } => {
3664                        let positions: Vec<usize> = columns
3665                            .iter()
3666                            .map(|c| {
3667                                table
3668                                    .schema()
3669                                    .columns
3670                                    .iter()
3671                                    .position(|sc| sc.name.eq_ignore_ascii_case(c))
3672                                    .ok_or_else(|| {
3673                                        EngineError::Unsupported(alloc::format!(
3674                                            "ALTER TABLE ADD CONSTRAINT: column {c:?} not found on {:?}",
3675                                            s.name
3676                                        ))
3677                                    })
3678                            })
3679                            .collect::<Result<Vec<_>, _>>()?;
3680                        // Skip if an equivalent UC is already there
3681                        // (idempotent — pg_dump's PK + a prior inline
3682                        // PK shouldn't double-install).
3683                        let already = table
3684                            .schema()
3685                            .uniqueness_constraints
3686                            .iter()
3687                            .any(|u| u.columns == positions);
3688                        if !already {
3689                            table.schema_mut().uniqueness_constraints.push(
3690                                spg_storage::UniquenessConstraint {
3691                                    is_primary_key: is_pk,
3692                                    columns: positions.clone(),
3693                                    nulls_not_distinct: false,
3694                                },
3695                            );
3696                            // PK implies NOT NULL on referenced cols.
3697                            if is_pk {
3698                                for p in &positions {
3699                                    if let Some(c) = table.schema_mut().columns.get_mut(*p) {
3700                                        c.nullable = false;
3701                                    }
3702                                }
3703                            }
3704                            // Add a BTree index on the leading
3705                            // column for INSERT-side enforcement.
3706                            let leading = &columns[0];
3707                            let already_idx = table.indices().iter().any(|idx| {
3708                                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3709                                    && table.schema().columns[idx.column_position].name
3710                                        == *leading
3711                            });
3712                            if !already_idx {
3713                                let suffix = if is_pk { "pkey" } else { "key" };
3714                                let idx_name = alloc::format!("{}_{leading}_{suffix}", s.name);
3715                                let _ = table.add_index(idx_name, leading);
3716                            }
3717                        }
3718                    }
3719                    spg_sql::ast::TableConstraint::Check { expr, .. } => {
3720                        table.schema_mut().checks.push(alloc::format!("{expr}"));
3721                    }
3722                    spg_sql::ast::TableConstraint::Index { name, columns } => {
3723                        // v7.15.0 — ALTER TABLE ADD KEY (cols).
3724                        // mysqldump occasionally emits this
3725                        // post-CREATE-TABLE shape; build a BTree
3726                        // on the leading column using the
3727                        // user-supplied or synthesised name.
3728                        let leading = &columns[0];
3729                        let already_idx = table.indices().iter().any(|idx| {
3730                            matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3731                                && table.schema().columns[idx.column_position].name == *leading
3732                        });
3733                        if !already_idx {
3734                            let idx_name = name
3735                                .clone()
3736                                .unwrap_or_else(|| alloc::format!("{}_{leading}_idx", s.name));
3737                            let _ = table.add_index(idx_name, leading);
3738                        }
3739                    }
3740                }
3741            }
3742            spg_sql::ast::AlterTableTarget::DropColumn {
3743                column,
3744                if_exists,
3745                cascade,
3746            } => {
3747                // v7.13.3 — mailrs round-7 S8. Remove the column +
3748                // every row's value at that position; drop any index
3749                // on the column. RESTRICT (default) rejects when an
3750                // FK on this table or partial-index predicate
3751                // references the column; CASCADE removes those
3752                // dependents first.
3753                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3754                    EngineError::Storage(StorageError::TableNotFound {
3755                        name: s.name.into(),
3756                    })
3757                })?;
3758                let col_pos = match table
3759                    .schema()
3760                    .columns
3761                    .iter()
3762                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3763                {
3764                    Some(p) => p,
3765                    None => {
3766                        if if_exists {
3767                            return Ok(());
3768                        }
3769                        return Err(EngineError::Unsupported(alloc::format!(
3770                            "ALTER TABLE DROP COLUMN: column {column:?} not found on {:?}",
3771                            s.name
3772                        )));
3773                    }
3774                };
3775                // Dependent check: FKs whose local columns include
3776                // col_pos. CASCADE drops them; otherwise reject.
3777                let dependent_fks: Vec<usize> = table
3778                    .schema()
3779                    .foreign_keys
3780                    .iter()
3781                    .enumerate()
3782                    .filter_map(|(i, fk)| {
3783                        if fk.local_columns.contains(&col_pos) {
3784                            Some(i)
3785                        } else {
3786                            None
3787                        }
3788                    })
3789                    .collect();
3790                if !dependent_fks.is_empty() && !cascade {
3791                    return Err(EngineError::Unsupported(alloc::format!(
3792                        "ALTER TABLE DROP COLUMN {column:?}: column has FK dependents; \
3793                         use DROP COLUMN ... CASCADE to remove them"
3794                    )));
3795                }
3796                // CASCADE the FK removals first.
3797                if cascade {
3798                    // Drop in reverse so indices stay valid.
3799                    let mut sorted = dependent_fks.clone();
3800                    sorted.sort();
3801                    sorted.reverse();
3802                    let fks = &mut table.schema_mut().foreign_keys;
3803                    for i in sorted {
3804                        fks.remove(i);
3805                    }
3806                }
3807                // Drop the column. New helper on Table does the
3808                // row + schema + index shift atomically.
3809                table.drop_column(col_pos);
3810            }
3811            spg_sql::ast::AlterTableTarget::SetTriggerEnabled { which, enabled } => {
3812                // v7.16.1 — mailrs round-9 A.2.b. pg_dump
3813                // --disable-triggers wraps each table's data
3814                // block with `ALTER TABLE … DISABLE TRIGGER ALL`
3815                // / `… ENABLE TRIGGER ALL`. Toggle the enabled
3816                // flag on every matching trigger so the row-
3817                // write paths skip them; the catalog snapshot
3818                // persists the new state across restarts.
3819                let table_name = s.name.to_string();
3820                let trigs = self.active_catalog_mut().triggers_mut();
3821                let mut touched = false;
3822                for t in trigs.iter_mut() {
3823                    if !t.table.eq_ignore_ascii_case(&table_name) {
3824                        continue;
3825                    }
3826                    match &which {
3827                        spg_sql::ast::TriggerSelector::All => {
3828                            t.enabled = enabled;
3829                            touched = true;
3830                        }
3831                        spg_sql::ast::TriggerSelector::Named(name) => {
3832                            if t.name.eq_ignore_ascii_case(name) {
3833                                t.enabled = enabled;
3834                                touched = true;
3835                            }
3836                        }
3837                    }
3838                }
3839                // PG semantics: `ALL` on a table with no
3840                // triggers is a no-op (no error). A `Named`
3841                // form pointing at a non-existent trigger
3842                // raises in PG; v7.16.1 also raises so we
3843                // don't silently lose state.
3844                if !touched {
3845                    if let spg_sql::ast::TriggerSelector::Named(name) = &which {
3846                        return Err(EngineError::Unsupported(alloc::format!(
3847                            "ALTER TABLE {table_name:?} {} TRIGGER {name:?}: no such trigger on table",
3848                            if enabled { "ENABLE" } else { "DISABLE" },
3849                        )));
3850                    }
3851                }
3852            }
3853            spg_sql::ast::AlterTableTarget::RenameColumn { old, new } => {
3854                // v7.15.0 — `ALTER TABLE t RENAME [COLUMN] old TO
3855                // new`. Rename the column in the schema; rewrite
3856                // every stored source string on this table that
3857                // references it as a (potentially-qualified)
3858                // column identifier: CHECK predicates, partial-
3859                // index predicates, runtime DEFAULT expressions.
3860                // Then walk catalog triggers on this table and
3861                // patch any `UPDATE OF` column list. Function and
3862                // trigger bodies are NOT auto-rewritten — that
3863                // surface is dynamic SQL territory; users update
3864                // those separately (matches PG plpgsql behavior:
3865                // a column rename invalidates name-referencing
3866                // plpgsql at call time, not rename time).
3867                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3868                    EngineError::Storage(StorageError::TableNotFound {
3869                        name: s.name.into(),
3870                    })
3871                })?;
3872                let col_pos = table
3873                    .schema()
3874                    .columns
3875                    .iter()
3876                    .position(|c| c.name.eq_ignore_ascii_case(&old))
3877                    .ok_or_else(|| {
3878                        EngineError::Unsupported(alloc::format!(
3879                            "ALTER TABLE RENAME COLUMN: column {old:?} not found on {:?}",
3880                            s.name
3881                        ))
3882                    })?;
3883                // Reject same-name (case-insensitive) collision.
3884                if table
3885                    .schema()
3886                    .columns
3887                    .iter()
3888                    .enumerate()
3889                    .any(|(i, c)| i != col_pos && c.name.eq_ignore_ascii_case(&new))
3890                {
3891                    return Err(EngineError::Unsupported(alloc::format!(
3892                        "ALTER TABLE RENAME COLUMN: column {new:?} already exists on {:?}",
3893                        s.name
3894                    )));
3895                }
3896                // Schema rename first — even idempotent same-name
3897                // rename (`ALTER TABLE t RENAME a TO a`) needs to
3898                // be a no-op, not an error.
3899                if old.eq_ignore_ascii_case(&new) {
3900                    return Ok(());
3901                }
3902                table.rename_column(col_pos, &new);
3903                // Rewrite per-column runtime_default sources on
3904                // every column of this table — a DEFAULT expression
3905                // on column X may reference column Y by name (rare,
3906                // but legal in PG when the value is supplied via a
3907                // function that takes the row).
3908                let n_cols = table.schema().columns.len();
3909                for i in 0..n_cols {
3910                    let rt = table.schema().columns[i].runtime_default.clone();
3911                    if let Some(src) = rt {
3912                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
3913                        table.schema_mut().columns[i].runtime_default = Some(rewritten);
3914                    }
3915                }
3916                // Rewrite table-level CHECK predicates.
3917                let checks = table.schema().checks.clone();
3918                let mut new_checks = Vec::with_capacity(checks.len());
3919                for chk in checks {
3920                    new_checks.push(rewrite_column_in_source(&chk, &old, &new)?);
3921                }
3922                table.schema_mut().checks = new_checks;
3923                // Rewrite per-index partial_predicate sources.
3924                let n_idx = table.indices().len();
3925                for i in 0..n_idx {
3926                    let pred = table.indices()[i].partial_predicate.clone();
3927                    if let Some(src) = pred {
3928                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
3929                        // SAFETY: indices_mut would be cleanest, but
3930                        // partial_predicate is the only mutable field
3931                        // here; reach in via the public mut accessor.
3932                        table.set_partial_predicate(i, Some(rewritten));
3933                    }
3934                }
3935                // Walk catalog triggers; patch `update_columns` on
3936                // triggers attached to this table.
3937                let table_name = s.name.to_string();
3938                for trig in self.active_catalog_mut().triggers_mut() {
3939                    if !trig.table.eq_ignore_ascii_case(&table_name) {
3940                        continue;
3941                    }
3942                    for c in &mut trig.update_columns {
3943                        if c.eq_ignore_ascii_case(&old) {
3944                            *c = new.clone();
3945                        }
3946                    }
3947                }
3948            }
3949        }
3950        Ok(())
3951    }
3952
3953    fn exec_alter_index(
3954        &mut self,
3955        stmt: spg_sql::ast::AlterIndexStatement,
3956    ) -> Result<QueryResult, EngineError> {
3957        // Translate the optional SQL-side encoding choice into the
3958        // storage-side enum; the same SqlVecEncoding -> VecEncoding
3959        // bridge `column_type_to_data_type` uses.
3960        let spg_sql::ast::AlterIndexStatement {
3961            name: idx_name,
3962            target,
3963        } = stmt;
3964        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target;
3965        let target = encoding.map(|e| match e {
3966            SqlVecEncoding::F32 => VecEncoding::F32,
3967            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
3968            SqlVecEncoding::F16 => VecEncoding::F16,
3969        });
3970        // Linear scan: index names are globally unique within a
3971        // catalog (enforced by add_nsw_index_inner) so the first
3972        // match is the only one. Save the table name to avoid
3973        // borrowing while we then take a mut borrow.
3974        let table_name = {
3975            let cat = self.active_catalog();
3976            let mut found: Option<String> = None;
3977            for tname in cat.table_names() {
3978                if let Some(t) = cat.get(&tname)
3979                    && t.indices().iter().any(|i| i.name == idx_name)
3980                {
3981                    found = Some(tname);
3982                    break;
3983                }
3984            }
3985            found.ok_or_else(|| {
3986                EngineError::Storage(StorageError::IndexNotFound {
3987                    name: idx_name.clone(),
3988                })
3989            })?
3990        };
3991        let table = self
3992            .active_catalog_mut()
3993            .get_mut(&table_name)
3994            .expect("table found above");
3995        table.rebuild_nsw_index(&idx_name, target)?;
3996        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
3997        // changes cost characteristics; evict any cached plans.
3998        self.plan_cache.evict_referencing(&table_name);
3999        Ok(QueryResult::CommandOk {
4000            affected: 0,
4001            modified_catalog: !self.in_transaction(),
4002        })
4003    }
4004
4005    fn exec_create_index(
4006        &mut self,
4007        stmt: CreateIndexStatement,
4008    ) -> Result<QueryResult, EngineError> {
4009        let table = self
4010            .active_catalog_mut()
4011            .get_mut(&stmt.table)
4012            .ok_or_else(|| {
4013                EngineError::Storage(StorageError::TableNotFound {
4014                    name: stmt.table.clone(),
4015                })
4016            })?;
4017        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
4018        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
4019            return Ok(QueryResult::CommandOk {
4020                affected: 0,
4021                modified_catalog: false,
4022            });
4023        }
4024        // v7.9.14 — multi-column index parses through; engine
4025        // builds a single-column BTree on the leading column only.
4026        // The extras live on the AST so spg-server's dispatcher
4027        // can emit a PG-wire NoticeResponse / log line. Composite
4028        // BTree keys land in v7.10.
4029        let _ = &stmt.extra_columns; // intentional drop on engine side
4030        let table_name = stmt.table.clone();
4031        // v6.8.0 — resolve INCLUDE column names to positions. Done
4032        // before `add_index` so a typo error surfaces before any
4033        // catalog mutation lands.
4034        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
4035            Vec::new()
4036        } else {
4037            let schema = table.schema();
4038            stmt.included_columns
4039                .iter()
4040                .map(|c| {
4041                    schema.column_position(c).ok_or_else(|| {
4042                        EngineError::Storage(StorageError::ColumnNotFound { column: c.clone() })
4043                    })
4044                })
4045                .collect::<Result<Vec<_>, _>>()?
4046        };
4047        match stmt.method {
4048            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
4049            IndexMethod::Hnsw => {
4050                if !included_positions.is_empty() {
4051                    return Err(EngineError::Unsupported(
4052                        "INCLUDE columns are not supported on HNSW indexes".into(),
4053                    ));
4054                }
4055                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
4056            }
4057            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
4058            IndexMethod::Brin => {
4059                if !included_positions.is_empty() {
4060                    return Err(EngineError::Unsupported(
4061                        "INCLUDE columns are not supported on BRIN indexes".into(),
4062                    ));
4063                }
4064                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
4065            }
4066            // v7.12.3 — GIN inverted index. Real posting-list-backed
4067            // GIN when the indexed column is `tsvector`; falls back
4068            // to a BTree on the leading column for any other column
4069            // type so v7.9.26b's `pg_dump` compatibility (GIN on
4070            // JSONB etc. silently loading as BTree) is preserved.
4071            // Operators see the real GIN only where it matters; old
4072            // schemas keep loading.
4073            IndexMethod::Gin => {
4074                if !included_positions.is_empty() {
4075                    return Err(EngineError::Unsupported(
4076                        "INCLUDE columns are not supported on GIN indexes".into(),
4077                    ));
4078                }
4079                let col_pos = table
4080                    .schema()
4081                    .column_position(&stmt.column)
4082                    .ok_or_else(|| {
4083                        EngineError::Storage(StorageError::ColumnNotFound {
4084                            column: stmt.column.clone(),
4085                        })
4086                    })?;
4087                let col_ty = table.schema().columns[col_pos].ty;
4088                // v7.15.0 — `gin_trgm_ops` on a TEXT/VARCHAR
4089                // column dispatches to the real trigram-shingle
4090                // GIN build (LIKE / similarity acceleration).
4091                // Other GIN opclasses fall through to the regular
4092                // tsvector-vs-BTree split below.
4093                let is_trgm = stmt
4094                    .opclass
4095                    .as_deref()
4096                    .is_some_and(|op| op.eq_ignore_ascii_case("gin_trgm_ops"));
4097                if is_trgm
4098                    && matches!(
4099                        col_ty,
4100                        spg_storage::DataType::Text | spg_storage::DataType::Varchar(_)
4101                    )
4102                {
4103                    table
4104                        .add_gin_trgm_index(stmt.name.clone(), &stmt.column)
4105                        .map_err(EngineError::Storage)?;
4106                } else if col_ty == spg_storage::DataType::TsVector {
4107                    table
4108                        .add_gin_index(stmt.name.clone(), &stmt.column)
4109                        .map_err(EngineError::Storage)?;
4110                } else {
4111                    // v7.9.26b BTree fallback — the catalog still
4112                    // gets an index entry on the leading column so
4113                    // pg_dump scripts that name GIN on JSONB / etc.
4114                    // load clean; query-time gain stays opt-in for
4115                    // tsvector callers.
4116                    table.add_index(stmt.name.clone(), &stmt.column)?;
4117                }
4118            }
4119        }
4120        if !included_positions.is_empty()
4121            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
4122        {
4123            idx.included_columns = included_positions;
4124        }
4125        // v6.8.1 — persist partial-index predicate. Stored as the
4126        // expression's Display form so the catalog snapshot stays
4127        // pure (storage has no spg-sql dependency). The runtime
4128        // maintenance path treats partial indexes identically to
4129        // full indexes for v6.8.1 (over-maintenance is safe; the
4130        // planner-side "use partial when query WHERE implies the
4131        // predicate" pass is STABILITY carve-out).
4132        if let Some(pred_expr) = &stmt.partial_predicate {
4133            let canonical = pred_expr.to_string();
4134            // v7.13.2 — mailrs round-6 S2. PG's `pg_trgm` uses
4135            // `CREATE INDEX … USING gin(col gin_trgm_ops) WHERE …`
4136            // routinely to slim trigram indexes. SPG now persists
4137            // the predicate for GIN / BRIN / HNSW the same way it
4138            // already does for BTree — same v6.8.1 "over-maintain
4139            // is safe; planner-side partial routing is STABILITY
4140            // carve-out" semantics. HNSW carries an additional
4141            // caveat: the predicate isn't applied at index build
4142            // time (would require per-row eval inside the NSW
4143            // construction loop), so the index oversamples; query
4144            // time the WHERE clause still filters correctly.
4145            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4146                idx.partial_predicate = Some(canonical);
4147            }
4148        }
4149        // v6.8.2 — persist expression index key. Same Display-form
4150        // storage; the runtime maintenance pass evaluates each
4151        // row's expression to derive the index key, but for v6.8.2
4152        // the engine falls through to the bare-column-reference
4153        // path and the expression is preserved for format-layer
4154        // round-trip + future planner work. Carved-out in
4155        // STABILITY § "Out of v6.8".
4156        if let Some(key_expr) = &stmt.expression {
4157            if matches!(
4158                stmt.method,
4159                IndexMethod::Hnsw | IndexMethod::Brin | IndexMethod::Gin
4160            ) {
4161                return Err(EngineError::Unsupported(
4162                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
4163                ));
4164            }
4165            let canonical = key_expr.to_string();
4166            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4167                idx.expression = Some(canonical);
4168            }
4169        }
4170        // v7.9.29 — persist `is_unique` flag on the storage Index.
4171        // Combined with `partial_predicate`, INSERT enforcement
4172        // checks that no other row whose predicate evaluates true
4173        // shares the same indexed key. Parser already rejected
4174        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
4175        // For multi-column UNIQUE INDEX the extras matter (the
4176        // full tuple is the uniqueness key), so resolve them to
4177        // column positions and persist on the index too.
4178        if stmt.is_unique {
4179            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
4180            for col_name in &stmt.extra_columns {
4181                let pos = table
4182                    .schema()
4183                    .columns
4184                    .iter()
4185                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
4186                    .ok_or_else(|| {
4187                        EngineError::Unsupported(alloc::format!(
4188                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
4189                            stmt.name,
4190                            stmt.table
4191                        ))
4192                    })?;
4193                extra_positions.push(pos);
4194            }
4195            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4196                idx.is_unique = true;
4197                idx.extra_column_positions = extra_positions;
4198            }
4199            // At index-creation time, check the existing rows for
4200            // pre-existing duplicates that would have violated the
4201            // new constraint — otherwise CREATE UNIQUE INDEX would
4202            // silently leave duplicates in place.
4203            let snapshot_indices = table.indices().to_vec();
4204            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
4205                table.rows().iter().cloned().collect();
4206            let snapshot_schema = table.schema().clone();
4207            let idx_ref = snapshot_indices
4208                .iter()
4209                .find(|i| i.name == stmt.name)
4210                .expect("just-added index");
4211            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
4212        }
4213        // v6.3.1 — adding an index can change the optimal plan for
4214        // any cached query that references this table.
4215        self.plan_cache.evict_referencing(&table_name);
4216        Ok(QueryResult::CommandOk {
4217            affected: 0,
4218            modified_catalog: !self.in_transaction(),
4219        })
4220    }
4221
4222    /// v7.13.3 — mailrs round-7 S9. SPG-specific reconciliation
4223    /// for `CREATE TABLE IF NOT EXISTS` when the table already
4224    /// exists. Adds missing columns + inline FKs from the new
4225    /// definition; existing columns / constraints stay untouched.
4226    /// New columns with a `NOT NULL` declaration without a
4227    /// `DEFAULT` are reported as a clear error rather than
4228    /// silently dropped — this is the "fail loud on real
4229    /// incompatibility, fail silent on schema-superset" tradeoff.
4230    fn reconcile_table_if_not_exists(
4231        &mut self,
4232        stmt: CreateTableStatement,
4233    ) -> Result<QueryResult, EngineError> {
4234        let table_name = stmt.name.clone();
4235        let clock = self.clock;
4236        let existing_col_names: alloc::collections::BTreeSet<String> = self
4237            .active_catalog()
4238            .get(&table_name)
4239            .expect("checked above")
4240            .schema()
4241            .columns
4242            .iter()
4243            .map(|c| c.name.to_ascii_lowercase())
4244            .collect();
4245        let row_count = self
4246            .active_catalog()
4247            .get(&table_name)
4248            .expect("checked above")
4249            .row_count();
4250        // Collect missing column defs in source order.
4251        let new_columns: alloc::vec::Vec<spg_sql::ast::ColumnDef> = stmt
4252            .columns
4253            .iter()
4254            .filter(|c| !existing_col_names.contains(&c.name.to_ascii_lowercase()))
4255            .cloned()
4256            .collect();
4257        for col_def in new_columns {
4258            let col_name = col_def.name.clone();
4259            let nullable = col_def.nullable;
4260            let has_default = col_def.default.is_some() || col_def.auto_increment;
4261            let col_schema = column_def_to_schema(col_def)?;
4262            let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
4263                resolve_column_default_free(&col_schema, clock)?
4264            } else if nullable || row_count == 0 {
4265                Value::Null
4266            } else {
4267                return Err(EngineError::Unsupported(alloc::format!(
4268                    "CREATE TABLE IF NOT EXISTS {table_name:?}: reconciling \
4269                     column {col_name:?} requires DEFAULT (existing rows would violate NOT NULL)"
4270                )));
4271            };
4272            let table = self
4273                .active_catalog_mut()
4274                .get_mut(&table_name)
4275                .expect("checked above");
4276            table.add_column(col_schema, fill_value);
4277        }
4278        // Resolve any newly-added inline FKs (column-level
4279        // REFERENCES forms) and install. Skip FKs whose local
4280        // columns we didn't have in the existing table.
4281        let table_cols_now = self
4282            .active_catalog()
4283            .get(&table_name)
4284            .expect("checked above")
4285            .schema()
4286            .columns
4287            .clone();
4288        for fk in stmt.foreign_keys {
4289            // Only install FKs whose every local column resolves
4290            // — older catalogs may have a column the new FK
4291            // references but not the column the new FK declares.
4292            let all_resolved = fk
4293                .columns
4294                .iter()
4295                .all(|c| table_cols_now.iter().any(|sc| sc.name.eq_ignore_ascii_case(c)));
4296            if !all_resolved {
4297                continue;
4298            }
4299            let already_present = {
4300                let table = self
4301                    .active_catalog()
4302                    .get(&table_name)
4303                    .expect("checked above");
4304                table.schema().foreign_keys.iter().any(|f| {
4305                    f.parent_table.eq_ignore_ascii_case(&fk.parent_table)
4306                        && f.local_columns.len() == fk.columns.len()
4307                })
4308            };
4309            if already_present {
4310                continue;
4311            }
4312            let storage_fk =
4313                resolve_foreign_key(&table_name, &table_cols_now, fk, self.active_catalog())?;
4314            let table = self
4315                .active_catalog_mut()
4316                .get_mut(&table_name)
4317                .expect("checked above");
4318            table.schema_mut().foreign_keys.push(storage_fk);
4319        }
4320        Ok(QueryResult::CommandOk {
4321            affected: 0,
4322            modified_catalog: !self.in_transaction(),
4323        })
4324    }
4325
4326    /// v7.14.0 — DROP TABLE handler (pg_dump / mysqldump preamble).
4327    fn exec_drop_table(
4328        &mut self,
4329        names: Vec<String>,
4330        if_exists: bool,
4331    ) -> Result<QueryResult, EngineError> {
4332        for name in names {
4333            let dropped = self.active_catalog_mut().drop_table(&name);
4334            if !dropped && !if_exists {
4335                return Err(EngineError::Storage(StorageError::TableNotFound { name }));
4336            }
4337        }
4338        Ok(QueryResult::CommandOk {
4339            affected: 0,
4340            modified_catalog: !self.in_transaction(),
4341        })
4342    }
4343
4344    /// v7.14.0 — DROP INDEX handler.
4345    fn exec_drop_index(
4346        &mut self,
4347        name: String,
4348        if_exists: bool,
4349    ) -> Result<QueryResult, EngineError> {
4350        let dropped = self.active_catalog_mut().drop_named_index(&name);
4351        if !dropped && !if_exists {
4352            return Err(EngineError::Storage(StorageError::IndexNotFound { name }));
4353        }
4354        Ok(QueryResult::CommandOk {
4355            affected: 0,
4356            modified_catalog: !self.in_transaction(),
4357        })
4358    }
4359
4360    fn exec_create_table(
4361        &mut self,
4362        stmt: CreateTableStatement,
4363    ) -> Result<QueryResult, EngineError> {
4364        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
4365            // v7.13.3 — mailrs round-7 S9 reconciliation. PG's
4366            // semantics for `CREATE TABLE IF NOT EXISTS` is a
4367            // silent no-op when the table exists, even if the new
4368            // definition adds columns or constraints. SPG extends
4369            // this: any column in the new definition that's
4370            // missing from the existing table is added (with
4371            // DEFAULT back-fill / NULL); inline FKs likewise.
4372            // Existing columns are NOT modified. This makes
4373            // mailrs's schema layering (init-schema's `contacts`
4374            // sender-tracking table + migrate-023's CardDAV
4375            // `contacts` extension) converge correctly without
4376            // mailrs-side edits. PG users who want PG-strict
4377            // silent-no-op behaviour can use SPG's `--strict-pg`
4378            // flag (deferred to v7.14).
4379            return self.reconcile_table_if_not_exists(stmt);
4380        }
4381        let table_name = stmt.name.clone();
4382        // v7.9.13 — pluck the names of any columns marked
4383        // `PRIMARY KEY` inline so the post-create-table pass can
4384        // build an implicit BTree index. mailrs F1.
4385        let inline_pk_columns: Vec<String> = stmt
4386            .columns
4387            .iter()
4388            .filter(|c| c.is_primary_key)
4389            .map(|c| c.name.clone())
4390            .collect();
4391        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
4392        // and UNIQUE (a, b, ...). Each builds a BTree index on the
4393        // leading column (the existing single-column storage tier)
4394        // and registers a UniquenessConstraint on the schema for
4395        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
4396        let cols = stmt
4397            .columns
4398            .into_iter()
4399            .map(column_def_to_schema)
4400            .collect::<Result<Vec<_>, _>>()?;
4401        // Composite NOT-NULL implication for PRIMARY KEY columns.
4402        let mut cols = cols;
4403        for tc in &stmt.table_constraints {
4404            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
4405                for col_name in columns {
4406                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
4407                        col.nullable = false;
4408                    }
4409                }
4410            }
4411        }
4412        // v7.6.1 — resolve every FK in the statement against the
4413        // already-known catalog. Validates: parent table exists,
4414        // parent column names exist, arity matches, parent columns
4415        // have a PK / UNIQUE index. Self-referencing FKs (parent
4416        // table == this table) resolve against the column list we
4417        // just built — they don't need the catalog yet.
4418        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
4419            Vec::with_capacity(stmt.foreign_keys.len());
4420        for fk in stmt.foreign_keys {
4421            // v7.14.0 — when SET FOREIGN_KEY_CHECKS=0 is in effect
4422            // (mysqldump preamble + bulk imports), defer FK
4423            // resolution if the parent table isn't in the catalog
4424            // yet. The FK is queued and resolved when checks flip
4425            // back on. Self-references stay in-band (the parent is
4426            // the same as the child we're building).
4427            let needs_parent = !fk.parent_table.eq_ignore_ascii_case(&table_name);
4428            if !self.foreign_key_checks
4429                && needs_parent
4430                && self.active_catalog().get(&fk.parent_table).is_none()
4431            {
4432                self.pending_foreign_keys
4433                    .push((table_name.clone(), fk));
4434                continue;
4435            }
4436            fks.push(resolve_foreign_key(
4437                &table_name,
4438                &cols,
4439                fk,
4440                self.active_catalog(),
4441            )?);
4442        }
4443        let mut schema = TableSchema::new(table_name.clone(), cols);
4444        schema.foreign_keys = fks;
4445        // v7.9.19 — translate AST table_constraints to storage
4446        // UniquenessConstraints (column name → position) so the
4447        // INSERT enforcement helper sees positions directly.
4448        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
4449        let mut check_exprs: Vec<String> = Vec::new();
4450        for tc in &stmt.table_constraints {
4451            let (is_pk, names, nnd) = match tc {
4452                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4453                    (true, columns.clone(), false)
4454                }
4455                spg_sql::ast::TableConstraint::Unique {
4456                    columns,
4457                    nulls_not_distinct,
4458                    ..
4459                } => (false, columns.clone(), *nulls_not_distinct),
4460                spg_sql::ast::TableConstraint::Check { expr, .. } => {
4461                    // v7.13.0 — collect CHECK predicate sources;
4462                    // they get attached to the schema below.
4463                    check_exprs.push(alloc::format!("{expr}"));
4464                    continue;
4465                }
4466                // v7.15.0 — plain `KEY (cols)` from MySQL inline
4467                // is NOT a uniqueness constraint; skip the UC
4468                // build path entirely. The BTree index lands in
4469                // the post-create loop below alongside the PK/UQ
4470                // implicit indexes.
4471                spg_sql::ast::TableConstraint::Index { .. } => continue,
4472            };
4473            let mut positions = Vec::with_capacity(names.len());
4474            for n in &names {
4475                let pos = schema
4476                    .columns
4477                    .iter()
4478                    .position(|c| c.name == *n)
4479                    .ok_or_else(|| {
4480                        EngineError::Unsupported(alloc::format!(
4481                            "table constraint references unknown column {n:?}"
4482                        ))
4483                    })?;
4484                positions.push(pos);
4485            }
4486            uc_storage.push(spg_storage::UniquenessConstraint {
4487                is_primary_key: is_pk,
4488                columns: positions,
4489                nulls_not_distinct: nnd,
4490            });
4491        }
4492        schema.uniqueness_constraints = uc_storage.clone();
4493        schema.checks = check_exprs;
4494        self.active_catalog_mut().create_table(schema)?;
4495        // v7.9.13 — implicit BTree per inline PK column +
4496        // v7.9.19 — implicit BTree on the leading column of every
4497        // table-level PRIMARY KEY / UNIQUE constraint.
4498        let table = self
4499            .active_catalog_mut()
4500            .get_mut(&table_name)
4501            .expect("just created");
4502        for (i, col_name) in inline_pk_columns.iter().enumerate() {
4503            let idx_name = if inline_pk_columns.len() == 1 {
4504                alloc::format!("{table_name}_pkey")
4505            } else {
4506                alloc::format!("{table_name}_pkey_{i}")
4507            };
4508            if let Err(e) = table.add_index(idx_name, col_name) {
4509                return Err(EngineError::Storage(e));
4510            }
4511        }
4512        for (i, tc) in stmt.table_constraints.iter().enumerate() {
4513            // v7.15.0 — plain KEY/INDEX rides this same loop so
4514            // the implicit BTree gets built. It carries its own
4515            // user-supplied name; PK/UQ still synthesise.
4516            let (suffix, names, explicit_name): (&str, &Vec<String>, Option<&String>) = match tc {
4517                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4518                    ("pkey", columns, None)
4519                }
4520                spg_sql::ast::TableConstraint::Unique { columns, .. } => ("key", columns, None),
4521                spg_sql::ast::TableConstraint::Index { name, columns } => {
4522                    ("idx", columns, name.as_ref())
4523                }
4524                spg_sql::ast::TableConstraint::Check { .. } => continue,
4525            };
4526            let leading = &names[0];
4527            // Skip if a same-column BTree already exists (e.g.
4528            // inline PK on the leading column).
4529            let already = table.indices().iter().any(|idx| {
4530                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
4531                    && table.schema().columns[idx.column_position].name == *leading
4532            });
4533            if already {
4534                continue;
4535            }
4536            let idx_name = if let Some(n) = explicit_name {
4537                n.clone()
4538            } else if names.len() == 1 {
4539                alloc::format!("{table_name}_{leading}_{suffix}")
4540            } else {
4541                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
4542            };
4543            if let Err(e) = table.add_index(idx_name, leading) {
4544                return Err(EngineError::Storage(e));
4545            }
4546        }
4547        Ok(QueryResult::CommandOk {
4548            affected: 0,
4549            modified_catalog: !self.in_transaction(),
4550        })
4551    }
4552
4553    fn exec_insert(&mut self, stmt: InsertStatement) -> Result<QueryResult, EngineError> {
4554        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
4555        // round-5 G4). Execute the inner SELECT first, then route
4556        // back through the regular VALUES code path with the
4557        // materialised rows.
4558        if let Some(select) = stmt.select_source.clone() {
4559            let select_result = self.exec_select_cancel(&select, CancelToken::none())?;
4560            let rows = match select_result {
4561                QueryResult::Rows { rows, .. } => rows,
4562                other => {
4563                    return Err(EngineError::Unsupported(alloc::format!(
4564                        "INSERT … SELECT: inner statement produced {other:?} instead of a row set"
4565                    )));
4566                }
4567            };
4568            let mut materialised: Vec<Vec<Expr>> = Vec::with_capacity(rows.len());
4569            for row in rows {
4570                let mut tuple: Vec<Expr> = Vec::with_capacity(row.values.len());
4571                for v in row.values {
4572                    tuple.push(value_to_literal_expr_permissive(v)?);
4573                }
4574                materialised.push(tuple);
4575            }
4576            let recurse = InsertStatement {
4577                table: stmt.table,
4578                columns: stmt.columns,
4579                rows: materialised,
4580                select_source: None,
4581                on_conflict: stmt.on_conflict,
4582                returning: stmt.returning,
4583            };
4584            return self.exec_insert(recurse);
4585        }
4586        // v7.9.21 — snapshot the clock fn pointer before the mut
4587        // borrow on the catalog opens; runtime DEFAULT eval needs
4588        // it inside the row hot loop.
4589        let clock = self.clock;
4590        // v7.12.4 — snapshot row-level triggers + their referenced
4591        // functions before the mut borrow on the catalog opens.
4592        // Cloned out so the row hot loop can fire them without
4593        // re-borrowing the catalog (which would conflict with
4594        // table.insert's mutable borrow).
4595        let before_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "BEFORE");
4596        let after_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "AFTER");
4597        let trigger_session_cfg: Option<alloc::string::String> = self
4598            .session_params
4599            .get("default_text_search_config")
4600            .cloned();
4601        let table = self
4602            .active_catalog_mut()
4603            .get_mut(&stmt.table)
4604            .ok_or_else(|| {
4605                EngineError::Storage(StorageError::TableNotFound {
4606                    name: stmt.table.clone(),
4607                })
4608            })?;
4609        // v3.1.5: clone the columns vector only (not the whole
4610        // TableSchema — saves one String alloc for the table name).
4611        // We need an owned snapshot because we'll call `table.insert`
4612        // (mutable borrow on `table`) inside the row loop while
4613        // reading schema fields.
4614        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
4615        let schema_cols_len = column_meta.len();
4616        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
4617        // column `c` is filled from the `j`-th tuple slot; `None` means
4618        // "fill with NULL". Validated once and reused for every row.
4619        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
4620            None => None, // 1-1 mapping, fast path
4621            Some(cols) => {
4622                let mut map = alloc::vec![None; schema_cols_len];
4623                for (j, name) in cols.iter().enumerate() {
4624                    let idx = column_meta
4625                        .iter()
4626                        .position(|c| c.name == *name)
4627                        .ok_or_else(|| {
4628                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
4629                        })?;
4630                    if map[idx].is_some() {
4631                        return Err(EngineError::Storage(StorageError::ArityMismatch {
4632                            expected: schema_cols_len,
4633                            actual: cols.len(),
4634                        }));
4635                    }
4636                    map[idx] = Some(j);
4637                }
4638                // Omitted columns must either be nullable, carry a
4639                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
4640                // omissions up front so the WAL stays clean.
4641                for (i, col) in column_meta.iter().enumerate() {
4642                    if map[i].is_none()
4643                        && !col.nullable
4644                        && col.default.is_none()
4645                        && col.runtime_default.is_none()
4646                        && !col.auto_increment
4647                    {
4648                        return Err(EngineError::Storage(StorageError::NullInNotNull {
4649                            column: col.name.clone(),
4650                        }));
4651                    }
4652                }
4653                Some(map)
4654            }
4655        };
4656        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
4657        // v7.6.2 — snapshot this table's FK list before the
4658        // mutable-borrow window so we can run parent lookups
4659        // against the immutable catalog after parsing. Empty vec is
4660        // the no-FK fast path; clone cost is O(fks * arity) which
4661        // is < 100 ns for typical schemas.
4662        let fks = table.schema().foreign_keys.clone();
4663        let mut affected = 0usize;
4664        // Stage 1 — parse + AUTO_INC + coerce all rows under the
4665        // single mutable borrow.
4666        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
4667        for tuple in stmt.rows {
4668            if tuple.len() != expected_tuple_len {
4669                return Err(EngineError::Storage(StorageError::ArityMismatch {
4670                    expected: expected_tuple_len,
4671                    actual: tuple.len(),
4672                }));
4673            }
4674            // Fast path: no column-list permutation → tuple slot j
4675            // maps to schema column j. We can zip schema with tuple
4676            // and skip the `raw_tuple` staging allocation entirely.
4677            let values: Vec<Value> = if let Some(map) = &tuple_pos {
4678                // Permuted path: still need raw_tuple to index by `map[i]`.
4679                let raw_tuple: Vec<Value> = tuple
4680                    .into_iter()
4681                    .map(literal_expr_to_value)
4682                    .collect::<Result<_, _>>()?;
4683                let mut out = Vec::with_capacity(schema_cols_len);
4684                for (i, col) in column_meta.iter().enumerate() {
4685                    let mut raw = match map[i] {
4686                        Some(j) => raw_tuple[j].clone(),
4687                        None => resolve_column_default_free(col, clock)?,
4688                    };
4689                    if col.auto_increment && raw.is_null() {
4690                        let next = table.next_auto_value(i).ok_or_else(|| {
4691                            EngineError::Unsupported(alloc::format!(
4692                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4693                                col.name
4694                            ))
4695                        })?;
4696                        raw = Value::BigInt(next);
4697                    }
4698                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4699                }
4700                out
4701            } else {
4702                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
4703                let mut out = Vec::with_capacity(schema_cols_len);
4704                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
4705                    let mut raw = literal_expr_to_value(expr)?;
4706                    if col.auto_increment && raw.is_null() {
4707                        let next = table.next_auto_value(i).ok_or_else(|| {
4708                            EngineError::Unsupported(alloc::format!(
4709                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4710                                col.name
4711                            ))
4712                        })?;
4713                        raw = Value::BigInt(next);
4714                    }
4715                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4716                }
4717                out
4718            };
4719            all_values.push(values);
4720        }
4721        // Stage 2 — FK enforcement on the immutable catalog.
4722        // Non-lexical lifetimes release the mutable borrow on
4723        // `table` here since stage 1 was the last use. The
4724        // parent-table lookup runs before any row is committed.
4725        let uniqueness = table.schema().uniqueness_constraints.clone();
4726        let _ = table;
4727        if !fks.is_empty() {
4728            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
4729        }
4730        // v7.13.0 — CHECK constraint enforcement (mailrs round-5 G3).
4731        enforce_check_constraints(self.active_catalog(), &stmt.table, &all_values)?;
4732        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
4733        enforce_uniqueness_inserts(self.active_catalog(), &stmt.table, &uniqueness, &all_values)?;
4734        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
4735        // Independent of table-level UniquenessConstraint (which
4736        // can't carry a predicate). Walks the table's indexes;
4737        // for each `is_unique` index, only rows whose
4738        // partial_predicate evaluates truthy are checked for
4739        // collision. mailrs K1.
4740        enforce_unique_index_inserts(self.active_catalog(), &stmt.table, &all_values)?;
4741        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
4742        //   - `DO NOTHING` filters `all_values` to non-conflicting
4743        //     rows + drops within-batch duplicates.
4744        //   - `DO UPDATE SET …` ALSO filters, but for each
4745        //     conflicting row it queues an UPDATE on the existing
4746        //     row using the incoming row's values as `EXCLUDED.*`.
4747        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
4748        let mut skipped_count = 0usize;
4749        if let Some(clause) = &stmt.on_conflict {
4750            let conflict_cols = resolve_on_conflict_columns(
4751                self.active_catalog(),
4752                &stmt.table,
4753                clause.target_columns.as_slice(),
4754            )?;
4755            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
4756            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
4757            for values in all_values {
4758                let key_tuple: Vec<&Value> = conflict_cols.iter().map(|&c| &values[c]).collect();
4759                // SQL spec: NULL in any conflict column means "no
4760                // conflict possible" (NULL ≠ NULL for uniqueness).
4761                let has_null_key = key_tuple.iter().any(|v| matches!(v, Value::Null));
4762                let collides_with_table = !has_null_key
4763                    && on_conflict_keys_exist(
4764                        self.active_catalog(),
4765                        &stmt.table,
4766                        &conflict_cols,
4767                        &key_tuple,
4768                    );
4769                let key_tuple_owned: Vec<Value> = key_tuple.iter().map(|v| (*v).clone()).collect();
4770                let collides_with_batch =
4771                    !has_null_key && seen_keys.iter().any(|k| k == &key_tuple_owned);
4772                let collides = collides_with_table || collides_with_batch;
4773                match (&clause.action, collides) {
4774                    (_, false) => {
4775                        seen_keys.push(key_tuple_owned);
4776                        kept.push(values);
4777                    }
4778                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
4779                        skipped_count += 1;
4780                    }
4781                    (
4782                        spg_sql::ast::OnConflictAction::Update {
4783                            assignments,
4784                            where_,
4785                        },
4786                        true,
4787                    ) => {
4788                        if !collides_with_table {
4789                            skipped_count += 1;
4790                            continue;
4791                        }
4792                        let target_pos = lookup_row_position_by_keys(
4793                            self.active_catalog(),
4794                            &stmt.table,
4795                            &conflict_cols,
4796                            &key_tuple,
4797                        )
4798                        .ok_or_else(|| {
4799                            EngineError::Unsupported(
4800                                "ON CONFLICT DO UPDATE: conflict detected but row \
4801                                 position could not be resolved (cold-tier row?)"
4802                                    .into(),
4803                            )
4804                        })?;
4805                        let updated = apply_on_conflict_assignments(
4806                            self.active_catalog(),
4807                            &stmt.table,
4808                            target_pos,
4809                            &values,
4810                            assignments,
4811                            where_.as_ref(),
4812                        )?;
4813                        if let Some(new_row) = updated {
4814                            pending_updates.push((target_pos, new_row));
4815                        } else {
4816                            skipped_count += 1;
4817                        }
4818                    }
4819                }
4820            }
4821            all_values = kept;
4822        }
4823        // Stage 3 — insert all rows under a fresh mutable borrow.
4824        let table = self
4825            .active_catalog_mut()
4826            .get_mut(&stmt.table)
4827            .ok_or_else(|| {
4828                EngineError::Storage(StorageError::TableNotFound {
4829                    name: stmt.table.clone(),
4830                })
4831            })?;
4832        // v7.9.4 — keep RETURNING projection rows separate per
4833        // INSERT and per UPDATE branch so DO UPDATE pushes the new
4834        // post-update state, not the incoming-only values.
4835        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
4836        // v7.12.7 — collect embedded SQL emitted by any trigger
4837        // fire across the row loop; engine drains the queue after
4838        // the table mut borrow drops.
4839        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
4840        'rowloop: for values in all_values {
4841            let mut row = Row::new(values);
4842            // v7.12.4 — BEFORE INSERT row-level triggers. Each
4843            // trigger may rewrite NEW cells (e.g. populate
4844            // `search_vector := to_tsvector(...)`) and may return
4845            // NULL to skip the row entirely.
4846            for fd in &before_insert_triggers {
4847                let (outcome, deferred) = triggers::fire_row_trigger(
4848                    fd,
4849                    Some(row.clone()),
4850                    None,
4851                    &stmt.table,
4852                    &column_meta,
4853                    &[],
4854                    trigger_session_cfg.as_deref(),
4855                    false,
4856                )
4857                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4858                deferred_embedded.extend(deferred);
4859                match outcome {
4860                    triggers::TriggerOutcome::Row(r) => row = r,
4861                    triggers::TriggerOutcome::Skip => continue 'rowloop,
4862                }
4863            }
4864            if stmt.returning.is_some() {
4865                returning_rows.push(row.values.clone());
4866            }
4867            // v7.12.4 — clone for the AFTER trigger view; insert
4868            // moves the row into the table.
4869            let inserted = row.clone();
4870            table.insert(row)?;
4871            affected += 1;
4872            // v7.12.4 — AFTER INSERT row-level triggers fire post-
4873            // write. Return value is ignored (PG semantics); we
4874            // surface any error from the body up to the caller.
4875            for fd in &after_insert_triggers {
4876                let (_outcome, deferred) = triggers::fire_row_trigger(
4877                    fd,
4878                    Some(inserted.clone()),
4879                    None,
4880                    &stmt.table,
4881                    &column_meta,
4882                    &[],
4883                    trigger_session_cfg.as_deref(),
4884                    true,
4885                )
4886                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4887                deferred_embedded.extend(deferred);
4888            }
4889        }
4890        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
4891        // in the conflict-resolution pass. update_row handles
4892        // index maintenance + body re-encoding.
4893        for (pos, new_row) in pending_updates {
4894            if stmt.returning.is_some() {
4895                returning_rows.push(new_row.clone());
4896            }
4897            table.update_row(pos, new_row)?;
4898            affected += 1;
4899        }
4900        let _ = skipped_count;
4901        // v7.12.7 — drop the table mut borrow and drain any
4902        // trigger-emitted embedded SQL queued during this INSERT.
4903        // The borrow has to release first because each deferred
4904        // stmt may UPDATE / INSERT / DELETE the same (or another)
4905        // table — including, in principle, this one.
4906        let _ = table;
4907        self.execute_deferred_trigger_stmts(deferred_embedded, CancelToken::none())?;
4908        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
4909        // up in the table after this statement (insert or
4910        // post-update on conflict).
4911        if let Some(items) = &stmt.returning {
4912            return self.build_returning_rows(&stmt.table, items, returning_rows);
4913        }
4914        // v6.2.1 — auto-analyze: track per-table modified-row
4915        // counter so the background sweep can decide when to
4916        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
4917        // — one BTreeMap entry update per INSERT batch.
4918        if !self.in_transaction() && affected > 0 {
4919            self.statistics
4920                .record_modifications(&stmt.table, affected as u64);
4921        }
4922        Ok(QueryResult::CommandOk {
4923            affected,
4924            modified_catalog: !self.in_transaction(),
4925        })
4926    }
4927
4928    /// v4.5: SELECT with cooperative cancellation. The token is
4929    /// honoured between UNION peers and inside the bare-SELECT row
4930    /// loop; HNSW kNN graph walks and the aggregate executor don't
4931    /// honour it yet (deferred — those paths bound their work
4932    /// internally by `LIMIT k` and `GROUP BY` cardinality).
4933    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
4934    /// by id, decodes each row body against the table's current
4935    /// schema, applies the SELECT's projection + optional WHERE +
4936    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
4937    /// / ORDER BY are unsupported on this path (STABILITY carve-
4938    /// out); operators wanting them should restore the segment
4939    /// into a regular table first.
4940    fn exec_select_as_of_segment(
4941        &self,
4942        stmt: &SelectStatement,
4943        from: &spg_sql::ast::FromClause,
4944        segment_id: u32,
4945    ) -> Result<QueryResult, EngineError> {
4946        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
4947        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
4948        if !from.joins.is_empty()
4949            || stmt.group_by.is_some()
4950            || stmt.having.is_some()
4951            || !stmt.unions.is_empty()
4952            || !stmt.order_by.is_empty()
4953            || stmt.offset.is_some()
4954            || stmt.distinct
4955            || aggregate::uses_aggregate(stmt)
4956        {
4957            return Err(EngineError::Unsupported(
4958                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
4959                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
4960                    .into(),
4961            ));
4962        }
4963        let table = self
4964            .active_catalog()
4965            .get(&from.primary.name)
4966            .ok_or_else(|| StorageError::TableNotFound {
4967                name: from.primary.name.clone(),
4968            })?;
4969        let schema = table.schema().clone();
4970        let schema_cols = &schema.columns;
4971        let alias = from
4972            .primary
4973            .alias
4974            .as_deref()
4975            .unwrap_or(from.primary.name.as_str());
4976        let ctx = EvalContext::new(schema_cols, Some(alias));
4977        let seg = self
4978            .active_catalog()
4979            .cold_segment(segment_id)
4980            .ok_or_else(|| {
4981                EngineError::Unsupported(alloc::format!(
4982                    "AS OF SEGMENT: cold segment {segment_id} not registered"
4983                ))
4984            })?;
4985        let mut out_rows: Vec<Row> = Vec::new();
4986        let mut limit_remaining: Option<usize> =
4987            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
4988        for (_key, body) in seg.scan() {
4989            let (row, _consumed) =
4990                spg_storage::decode_row_body_dense(&body, &schema).map_err(EngineError::Storage)?;
4991            if let Some(where_expr) = &stmt.where_ {
4992                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
4993                if !matches!(cond, Value::Bool(true)) {
4994                    continue;
4995                }
4996            }
4997            // Projection.
4998            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
4999            out_rows.push(projected);
5000            if let Some(rem) = limit_remaining.as_mut() {
5001                if *rem == 0 {
5002                    out_rows.pop();
5003                    break;
5004                }
5005                *rem -= 1;
5006            }
5007        }
5008        // Output column schema: derive from SELECT items.
5009        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
5010        Ok(QueryResult::Rows {
5011            columns,
5012            rows: out_rows,
5013        })
5014    }
5015
5016    /// v6.10.2 — simple-path WHERE eval that doesn't go through
5017    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
5018    /// scan paths predicate against a snapshot frozen segment, no
5019    /// cross-row state.
5020    fn eval_expr_simple(
5021        &self,
5022        expr: &Expr,
5023        row: &Row,
5024        ctx: &EvalContext,
5025    ) -> Result<Value, EngineError> {
5026        let cancel = CancelToken::none();
5027        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
5028    }
5029
5030    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
5031    /// Given the table name, the user-supplied projection items,
5032    /// and the mutated rows (post-insert / post-update values, or
5033    /// pre-delete snapshot), build a `QueryResult::Rows` whose
5034    /// schema describes the projected columns. Mailrs migration
5035    /// blocker #1.
5036    fn build_returning_rows(
5037        &self,
5038        table_name: &str,
5039        items: &[SelectItem],
5040        mutated_rows: Vec<Vec<Value>>,
5041    ) -> Result<QueryResult, EngineError> {
5042        let table = self.active_catalog().get(table_name).ok_or_else(|| {
5043            EngineError::Storage(StorageError::TableNotFound {
5044                name: table_name.into(),
5045            })
5046        })?;
5047        let schema_cols = table.schema().columns.clone();
5048        let columns = self.derive_output_columns(items, &schema_cols, table_name);
5049        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
5050        for values in mutated_rows {
5051            let row = Row::new(values);
5052            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
5053            out_rows.push(projected);
5054        }
5055        Ok(QueryResult::Rows {
5056            columns,
5057            rows: out_rows,
5058        })
5059    }
5060
5061    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
5062    /// `SelectItem::Wildcard` to all schema columns and
5063    /// `SelectItem::Expr` via the regular eval path.
5064    fn project_row_simple(
5065        &self,
5066        row: &Row,
5067        items: &[SelectItem],
5068        schema_cols: &[ColumnSchema],
5069        alias: &str,
5070    ) -> Result<Row, EngineError> {
5071        let ctx = EvalContext::new(schema_cols, Some(alias));
5072        let cancel = CancelToken::none();
5073        let mut out_vals = Vec::new();
5074        for item in items {
5075            match item {
5076                SelectItem::Wildcard => {
5077                    out_vals.extend(row.values.iter().cloned());
5078                }
5079                SelectItem::Expr { expr, .. } => {
5080                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
5081                    out_vals.push(v);
5082                }
5083            }
5084        }
5085        Ok(Row::new(out_vals))
5086    }
5087
5088    /// v6.10.2 — derive the output `ColumnSchema` list for an
5089    /// AS OF SEGMENT projection. Wildcards take the full schema;
5090    /// expressions take the alias if present or a synthetic
5091    /// `?column?` (PG convention) otherwise.
5092    fn derive_output_columns(
5093        &self,
5094        items: &[SelectItem],
5095        schema_cols: &[ColumnSchema],
5096        _alias: &str,
5097    ) -> Vec<ColumnSchema> {
5098        let mut out = Vec::new();
5099        for item in items {
5100            match item {
5101                SelectItem::Wildcard => {
5102                    out.extend(schema_cols.iter().cloned());
5103                }
5104                SelectItem::Expr { alias, .. } => {
5105                    let name = alias.clone().unwrap_or_else(|| "?column?".to_string());
5106                    // Default to Text; the caller's row values
5107                    // carry the actual type. v6.10.2 scope.
5108                    out.push(ColumnSchema::new(name, DataType::Text, true));
5109                }
5110            }
5111        }
5112        out
5113    }
5114
5115    fn exec_select_cancel(
5116        &self,
5117        stmt: &SelectStatement,
5118        cancel: CancelToken<'_>,
5119    ) -> Result<QueryResult, EngineError> {
5120        cancel.check()?;
5121        // v6.10.2 — cold-tier time-travel short-circuit. When the
5122        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
5123        // dedicated cold-segment scan instead of the regular
5124        // hot+index path. The scope is intentionally narrow for
5125        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
5126        // optionally with a single-column-equality WHERE. JOINs /
5127        // aggregates / ORDER BY / subqueries on top of a time-
5128        // travelled scan are STABILITY § "Out of v6.10".
5129        if let Some(from) = &stmt.from
5130            && let Some(seg_id) = from.primary.as_of_segment
5131        {
5132            return self.exec_select_as_of_segment(stmt, from, seg_id);
5133        }
5134        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
5135        // pre-CTE because they don't read from the catalog and
5136        // shouldn't participate in regular FROM resolution.
5137        if let Some(from) = &stmt.from
5138            && from.joins.is_empty()
5139            && stmt.where_.is_none()
5140            && stmt.group_by.is_none()
5141            && stmt.having.is_none()
5142            && stmt.unions.is_empty()
5143            && stmt.order_by.is_empty()
5144            && stmt.limit.is_none()
5145            && stmt.offset.is_none()
5146            && !stmt.distinct
5147            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
5148        {
5149            let lower = from.primary.name.to_ascii_lowercase();
5150            match lower.as_str() {
5151                "spg_statistic" => return Ok(self.exec_spg_statistic()),
5152                // v6.5.0 — observability v2 virtual tables.
5153                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
5154                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
5155                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
5156                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
5157                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
5158                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
5159                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
5160                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
5161                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
5162                _ => {}
5163            }
5164        }
5165        // v4.11: CTEs materialise into a temporary enriched catalog
5166        // *before* anything else — the body SELECT can then refer
5167        // to CTE names via the regular FROM-clause resolution.
5168        // Uncorrelated only: each CTE body runs once against the
5169        // current catalog, not against later CTEs' results (left-
5170        // to-right materialisation would relax this, but we keep
5171        // it simple for v4.11 MVP).
5172        if !stmt.ctes.is_empty() {
5173            return self.exec_with_ctes(stmt, cancel);
5174        }
5175        // v4.10: subqueries (uncorrelated) are resolved here, before
5176        // the executor sees the row loop. We clone the statement so
5177        // we can mutate without disturbing the caller's AST — most
5178        // queries pass through with no subquery nodes and the clone
5179        // is cheap; with subqueries the materialisation cost
5180        // dominates anyway.
5181        let mut stmt_owned;
5182        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
5183            stmt_owned = stmt.clone();
5184            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
5185            &stmt_owned
5186        } else {
5187            stmt
5188        };
5189        if stmt_ref.unions.is_empty() {
5190            return self.exec_bare_select_cancel(stmt_ref, cancel);
5191        }
5192        // UNION path: clone-strip the head into a bare block (its own
5193        // DISTINCT and any inner ORDER BY are dropped by parser rule —
5194        // the wrapper SelectStatement carries them), execute, then chain
5195        // peers with left-associative dedup semantics.
5196        let mut head = stmt_ref.clone();
5197        head.unions = Vec::new();
5198        head.order_by = Vec::new();
5199        head.limit = None;
5200        let QueryResult::Rows { columns, mut rows } =
5201            self.exec_bare_select_cancel(&head, cancel)?
5202        else {
5203            unreachable!("bare SELECT cannot return CommandOk")
5204        };
5205        for (kind, peer) in &stmt_ref.unions {
5206            let QueryResult::Rows {
5207                columns: peer_cols,
5208                rows: peer_rows,
5209            } = self.exec_bare_select_cancel(peer, cancel)?
5210            else {
5211                unreachable!("bare SELECT cannot return CommandOk")
5212            };
5213            if peer_cols.len() != columns.len() {
5214                return Err(EngineError::Unsupported(alloc::format!(
5215                    "UNION arity mismatch: head has {} columns, peer has {}",
5216                    columns.len(),
5217                    peer_cols.len()
5218                )));
5219            }
5220            rows.extend(peer_rows);
5221            if matches!(kind, UnionKind::Distinct) {
5222                rows = dedup_rows(rows);
5223            }
5224        }
5225        // ORDER BY at the top of a UNION applies to the combined result.
5226        // Eval against the projected schema (NOT the source table).
5227        if !stmt.order_by.is_empty() {
5228            let synth_ctx = EvalContext::new(&columns, None);
5229            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5230            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
5231            for r in rows {
5232                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
5233                tagged.push((keys, r));
5234            }
5235            sort_by_keys(&mut tagged, &descs);
5236            rows = tagged.into_iter().map(|(_, r)| r).collect();
5237        }
5238        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
5239        Ok(QueryResult::Rows { columns, rows })
5240    }
5241
5242    #[allow(clippy::too_many_lines)]
5243    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
5244    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
5245    /// Synthesises a single-column virtual table whose column type
5246    /// is TEXT and whose rows are the array elements. Routes
5247    /// through the regular projection / WHERE / ORDER BY / LIMIT
5248    /// machinery so set-returning UNNEST composes naturally with
5249    /// the rest of the SELECT surface.
5250    fn exec_select_unnest(
5251        &self,
5252        stmt: &SelectStatement,
5253        primary: &TableRef,
5254        cancel: CancelToken<'_>,
5255    ) -> Result<QueryResult, EngineError> {
5256        let expr = primary
5257            .unnest_expr
5258            .as_deref()
5259            .expect("caller guards unnest_expr.is_some()");
5260        // Evaluate the array expression once. Empty schema / empty
5261        // row — uncorrelated UNNEST cannot reference outer columns.
5262        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
5263        let ctx = EvalContext::new(&empty_schema, None);
5264        let dummy_row = Row::new(alloc::vec::Vec::new());
5265        // v7.11.13 — unnest dispatches per array element type so
5266        // INT[] / BIGINT[] surface their PG types in projection.
5267        let (elem_dtype, rows): (DataType, alloc::vec::Vec<Row>) =
5268            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5269                Value::Null => (DataType::Text, alloc::vec::Vec::new()),
5270                Value::TextArray(items) => {
5271                    let rows = items
5272                        .into_iter()
5273                        .map(|item| {
5274                            Row::new(alloc::vec![match item {
5275                                Some(s) => Value::Text(s),
5276                                None => Value::Null,
5277                            }])
5278                        })
5279                        .collect();
5280                    (DataType::Text, rows)
5281                }
5282                Value::IntArray(items) => {
5283                    let rows = items
5284                        .into_iter()
5285                        .map(|item| {
5286                            Row::new(alloc::vec![match item {
5287                                Some(n) => Value::Int(n),
5288                                None => Value::Null,
5289                            }])
5290                        })
5291                        .collect();
5292                    (DataType::Int, rows)
5293                }
5294                Value::BigIntArray(items) => {
5295                    let rows = items
5296                        .into_iter()
5297                        .map(|item| {
5298                            Row::new(alloc::vec![match item {
5299                                Some(n) => Value::BigInt(n),
5300                                None => Value::Null,
5301                            }])
5302                        })
5303                        .collect();
5304                    (DataType::BigInt, rows)
5305                }
5306                other => {
5307                    return Err(EngineError::Unsupported(alloc::format!(
5308                        "unnest() expects an array argument, got {:?}",
5309                        other.data_type()
5310                    )));
5311                }
5312            };
5313        let alias = primary
5314            .alias
5315            .clone()
5316            .unwrap_or_else(|| "unnest".to_string());
5317        // v7.13.2 — mailrs round-6 S5. Honour PG-standard
5318        // `UNNEST(arr) AS p(col_name)` column-list aliasing: the
5319        // first entry overrides the projected column's name.
5320        // Without the column list, fall back to the table alias
5321        // (pre-v7.13.2 behaviour).
5322        let col_name = primary
5323            .unnest_column_aliases
5324            .first()
5325            .cloned()
5326            .unwrap_or_else(|| alias.clone());
5327        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
5328        let schema_cols = alloc::vec![col_schema.clone()];
5329        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
5330        // Apply WHERE.
5331        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
5332            let mut out = alloc::vec::Vec::with_capacity(rows.len());
5333            for row in rows {
5334                cancel.check()?;
5335                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
5336                if matches!(v, Value::Bool(true)) {
5337                    out.push(row);
5338                }
5339            }
5340            out
5341        } else {
5342            rows
5343        };
5344        // Projection.
5345        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
5346        let mut projected_rows: alloc::vec::Vec<Row> =
5347            alloc::vec::Vec::with_capacity(filtered.len());
5348        for row in &filtered {
5349            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
5350            for p in &projection {
5351                vals.push(eval::eval_expr(&p.expr, row, &scan_ctx).map_err(EngineError::Eval)?);
5352            }
5353            projected_rows.push(Row::new(vals));
5354        }
5355        // ORDER BY / LIMIT — apply on the projected rows (cheap;
5356        // unnest result sets are small by design).
5357        let columns: alloc::vec::Vec<ColumnSchema> = projection
5358            .iter()
5359            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
5360            .collect();
5361        // Re-evaluate ORDER BY against the source schema (pre-projection
5362        // so col refs by name still resolve through `scan_ctx`).
5363        if !stmt.order_by.is_empty() {
5364            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
5365                .iter()
5366                .enumerate()
5367                .map(|(i, r)| -> Result<_, EngineError> {
5368                    let keys: Result<Vec<Value>, EngineError> = stmt
5369                        .order_by
5370                        .iter()
5371                        .map(|ob| {
5372                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
5373                        })
5374                        .collect();
5375                    Ok((i, keys?))
5376                })
5377                .collect::<Result<_, _>>()?;
5378            indexed.sort_by(|a, b| {
5379                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
5380                    let mut cmp = value_cmp(ka, kb);
5381                    if stmt.order_by[idx].desc {
5382                        cmp = cmp.reverse();
5383                    }
5384                    if cmp != core::cmp::Ordering::Equal {
5385                        return cmp;
5386                    }
5387                }
5388                core::cmp::Ordering::Equal
5389            });
5390            projected_rows = indexed
5391                .into_iter()
5392                .map(|(i, _)| projected_rows[i].clone())
5393                .collect();
5394        }
5395        // LIMIT / OFFSET — apply at the tail.
5396        if let Some(offset) = stmt.offset_literal() {
5397            let off = (offset as usize).min(projected_rows.len());
5398            projected_rows.drain(..off);
5399        }
5400        if let Some(limit) = stmt.limit_literal() {
5401            projected_rows.truncate(limit as usize);
5402        }
5403        Ok(QueryResult::Rows {
5404            columns,
5405            rows: projected_rows,
5406        })
5407    }
5408
5409    fn exec_bare_select_cancel(
5410        &self,
5411        stmt: &SelectStatement,
5412        cancel: CancelToken<'_>,
5413    ) -> Result<QueryResult, EngineError> {
5414        // v4.12: window-function path. When the projection contains
5415        // any `name(args) OVER (...)` we route to the dedicated
5416        // executor — partition + sort + per-row window value before
5417        // the regular projection.
5418        if select_has_window(stmt) {
5419            return self.exec_select_with_window(stmt, cancel);
5420        }
5421        // Constant SELECT (no FROM) — evaluate each item once against an
5422        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
5423        // `SELECT '7'::INT`. Column references will surface as
5424        // ColumnNotFound on eval since the schema is empty.
5425        let Some(from) = &stmt.from else {
5426            let empty_schema: Vec<ColumnSchema> = Vec::new();
5427            let ctx = self.ev_ctx(&empty_schema, None);
5428            let projection = build_projection(&stmt.items, &empty_schema, "")?;
5429            let dummy_row = Row::new(Vec::new());
5430            let mut values = Vec::with_capacity(projection.len());
5431            for p in &projection {
5432                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
5433            }
5434            let columns: Vec<ColumnSchema> = projection
5435                .into_iter()
5436                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5437                .collect();
5438            return Ok(QueryResult::Rows {
5439                columns,
5440                rows: alloc::vec![Row::new(values)],
5441            });
5442        };
5443        // Multi-table FROM (one or more joined peers) goes through the
5444        // nested-loop join executor. Single-table FROM stays on the
5445        // existing scan + index-seek path.
5446        if !from.joins.is_empty() {
5447            return self.exec_joined_select(stmt, from);
5448        }
5449        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
5450        // single-column table at SELECT entry by evaluating the
5451        // expression once against the empty row (UNNEST is
5452        // uncorrelated in v7.11; correlated / LATERAL unnest is a
5453        // v7.12 carve-out). Build a virtual `Table` in a heap-only
5454        // catalog, then route to the regular scan path.
5455        if from.primary.unnest_expr.is_some() {
5456            return self.exec_select_unnest(stmt, &from.primary, cancel);
5457        }
5458        let primary = &from.primary;
5459        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
5460            StorageError::TableNotFound {
5461                name: primary.name.clone(),
5462            }
5463        })?;
5464        let schema_cols = &table.schema().columns;
5465        // The qualifier accepted on column refs is the alias (if any) else the
5466        // bare table name.
5467        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
5468        let ctx = self.ev_ctx(schema_cols, Some(alias));
5469
5470        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
5471        // WHERE and an NSW index on `col` skips the full scan. The
5472        // walk returns rows already in ascending-distance order, so
5473        // ORDER BY / LIMIT are honoured implicitly.
5474        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
5475            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
5476        }
5477
5478        // Index seek: if WHERE is `col = literal` (or commuted) and the
5479        // referenced column has an index, dispatch each locator through
5480        // the catalog (hot tier → borrow, cold tier → page-read +
5481        // decode) and iterate just those rows. Otherwise fall back to a
5482        // full scan over the hot tier (cold-tier rows are only reached
5483        // via index seek in v5.1 — full table scans against cold-tier
5484        // data ship in v5.2 with the freezer's per-segment scan API).
5485        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt.where_.as_ref().and_then(|w| {
5486            // BTree / col=literal seek first — covers the v7.11.3 multi-
5487            // column AND case and the leading-column equality lookup.
5488            try_index_seek(w, schema_cols, self.active_catalog(), table, alias)
5489                .or_else(|| {
5490                    // v7.12.3 — GIN-accelerated `WHERE col @@
5491                    // tsquery` when the column has a `USING gin`
5492                    // index. Returns an over-approximate candidate
5493                    // set; the WHERE re-eval loop below verifies
5494                    // the full `@@` predicate per row.
5495                    try_gin_seek(w, schema_cols, self.active_catalog(), table, alias, &ctx)
5496                })
5497                .or_else(|| {
5498                    // v7.15.0 — trigram-GIN-accelerated
5499                    // `WHERE col LIKE / ILIKE '<pat>'` when the
5500                    // column has a `gin_trgm_ops` GIN index.
5501                    // Over-approximate candidate set; the WHERE
5502                    // re-eval verifies the LIKE per row.
5503                    try_trgm_seek(w, schema_cols, table, alias)
5504                })
5505        });
5506
5507        // Aggregate path: filter rows first, then hand off to the
5508        // aggregate executor which does its own projection + ORDER BY.
5509        if aggregate::uses_aggregate(stmt) {
5510            let mut filtered: Vec<&Row> = Vec::new();
5511            // v6.2.6 — Memoize: per-query LRU cache for correlated
5512            // scalar subqueries. Fresh per row-loop entry so each
5513            // SELECT execution gets an isolated cache.
5514            let mut memo = memoize::MemoizeCache::new();
5515            if let Some(rows) = &indexed_rows {
5516                for cow in rows {
5517                    let row = cow.as_ref();
5518                    if let Some(where_expr) = &stmt.where_ {
5519                        let cond = self.eval_expr_with_correlated(
5520                            where_expr,
5521                            row,
5522                            &ctx,
5523                            cancel,
5524                            Some(&mut memo),
5525                        )?;
5526                        if !matches!(cond, Value::Bool(true)) {
5527                            continue;
5528                        }
5529                    }
5530                    filtered.push(row);
5531                }
5532            } else {
5533                for i in 0..table.row_count() {
5534                    let row = &table.rows()[i];
5535                    if let Some(where_expr) = &stmt.where_ {
5536                        let cond = self.eval_expr_with_correlated(
5537                            where_expr,
5538                            row,
5539                            &ctx,
5540                            cancel,
5541                            Some(&mut memo),
5542                        )?;
5543                        if !matches!(cond, Value::Bool(true)) {
5544                            continue;
5545                        }
5546                    }
5547                    filtered.push(row);
5548                }
5549            }
5550            let mut agg = aggregate::run(stmt, &filtered, schema_cols, Some(alias))?;
5551            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5552            return Ok(QueryResult::Rows {
5553                columns: agg.columns,
5554                rows: agg.rows,
5555            });
5556        }
5557
5558        let projection = build_projection(&stmt.items, schema_cols, alias)?;
5559
5560        // Materialise the filter pass into `(order_key, projected_row)`
5561        // tuples. The order key is `None` when there's no ORDER BY clause.
5562        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5563        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
5564        let mut memo = memoize::MemoizeCache::new();
5565        // Inline the per-row work in a closure so the indexed and full-
5566        // scan branches share the body.
5567        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
5568            if loop_idx.is_multiple_of(256) {
5569                cancel.check()?;
5570            }
5571            if let Some(where_expr) = &stmt.where_ {
5572                let cond =
5573                    self.eval_expr_with_correlated(where_expr, row, &ctx, cancel, Some(&mut memo))?;
5574                if !matches!(cond, Value::Bool(true)) {
5575                    return Ok(());
5576                }
5577            }
5578            let mut values = Vec::with_capacity(projection.len());
5579            for p in &projection {
5580                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5581            }
5582            let order_keys = if stmt.order_by.is_empty() {
5583                Vec::new()
5584            } else {
5585                build_order_keys(&stmt.order_by, row, &ctx)?
5586            };
5587            tagged.push((order_keys, Row::new(values)));
5588            Ok(())
5589        };
5590        if let Some(rows) = &indexed_rows {
5591            for (loop_idx, cow) in rows.iter().enumerate() {
5592                process_row(cow.as_ref(), loop_idx)?;
5593            }
5594        } else {
5595            for i in 0..table.row_count() {
5596                process_row(&table.rows()[i], i)?;
5597            }
5598        }
5599
5600        if !stmt.order_by.is_empty() {
5601            // Partial-sort fast path: when LIMIT is small relative to
5602            // the row count, select_nth_unstable + sort just the
5603            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
5604            // requires the full sort because de-dup happens after.
5605            let keep = if stmt.distinct {
5606                None
5607            } else {
5608                stmt.limit_literal()
5609                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5610            };
5611            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5612            partial_sort_tagged(&mut tagged, keep, &descs);
5613        }
5614
5615        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5616        if stmt.distinct {
5617            output_rows = dedup_rows(output_rows);
5618        }
5619        apply_offset_and_limit(
5620            &mut output_rows,
5621            stmt.offset_literal(),
5622            stmt.limit_literal(),
5623        );
5624
5625        let columns: Vec<ColumnSchema> = projection
5626            .into_iter()
5627            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5628            .collect();
5629
5630        Ok(QueryResult::Rows {
5631            columns,
5632            rows: output_rows,
5633        })
5634    }
5635
5636    /// Multi-table SELECT executor (one or more JOIN peers).
5637    ///
5638    /// v1.10 builds the joined row set up-front via nested-loop joins,
5639    /// then runs WHERE + projection + ORDER BY against the combined
5640    /// rows. No index seek. Aggregates and DISTINCT still work because
5641    /// the executor delegates projection through the same shared paths.
5642    #[allow(clippy::too_many_lines)]
5643    /// v7.13.2 — mailrs round-6 S5. Resolve a TableRef into an
5644    /// owned (rows, schema) pair. Catalog tables clone their hot
5645    /// rows + schema; UNNEST table refs evaluate their array
5646    /// expression once and synthesise a single-column row set
5647    /// using the same dispatch as `exec_select_unnest`. Used by
5648    /// the joined-select path so UNNEST can appear in any FROM
5649    /// position, not just as the primary.
5650    fn materialise_table_ref(
5651        &self,
5652        tref: &TableRef,
5653    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
5654        if let Some(expr) = tref.unnest_expr.as_deref() {
5655            let empty_schema: Vec<ColumnSchema> = Vec::new();
5656            let ctx = EvalContext::new(&empty_schema, None);
5657            let dummy_row = Row::new(Vec::new());
5658            let (elem_dtype, rows) =
5659                match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5660                    Value::Null => (DataType::Text, Vec::new()),
5661                    Value::TextArray(items) => (
5662                        DataType::Text,
5663                        items
5664                            .into_iter()
5665                            .map(|item| {
5666                                Row::new(alloc::vec![match item {
5667                                    Some(s) => Value::Text(s),
5668                                    None => Value::Null,
5669                                }])
5670                            })
5671                            .collect(),
5672                    ),
5673                    Value::IntArray(items) => (
5674                        DataType::Int,
5675                        items
5676                            .into_iter()
5677                            .map(|item| {
5678                                Row::new(alloc::vec![match item {
5679                                    Some(n) => Value::Int(n),
5680                                    None => Value::Null,
5681                                }])
5682                            })
5683                            .collect(),
5684                    ),
5685                    Value::BigIntArray(items) => (
5686                        DataType::BigInt,
5687                        items
5688                            .into_iter()
5689                            .map(|item| {
5690                                Row::new(alloc::vec![match item {
5691                                    Some(n) => Value::BigInt(n),
5692                                    None => Value::Null,
5693                                }])
5694                            })
5695                            .collect(),
5696                    ),
5697                    other => {
5698                        return Err(EngineError::Unsupported(alloc::format!(
5699                            "unnest() expects an array argument, got {:?}",
5700                            other.data_type()
5701                        )));
5702                    }
5703                };
5704            let alias = tref.alias.clone().unwrap_or_else(|| "unnest".to_string());
5705            let col_name = tref
5706                .unnest_column_aliases
5707                .first()
5708                .cloned()
5709                .unwrap_or(alias);
5710            return Ok((rows, alloc::vec![ColumnSchema::new(col_name, elem_dtype, true)]));
5711        }
5712        let table = self
5713            .active_catalog()
5714            .get(&tref.name)
5715            .ok_or_else(|| StorageError::TableNotFound {
5716                name: tref.name.clone(),
5717            })?;
5718        let rows: Vec<Row> = table.rows().iter().cloned().collect();
5719        let cols = table.schema().columns.clone();
5720        Ok((rows, cols))
5721    }
5722
5723    fn exec_joined_select(
5724        &self,
5725        stmt: &SelectStatement,
5726        from: &FromClause,
5727    ) -> Result<QueryResult, EngineError> {
5728        // v7.13.2 — mailrs round-6 S5. UNNEST peers materialise
5729        // into virtual (rows, schema) sources alongside catalog
5730        // tables, so `FROM t, UNNEST(arr) AS p(col)` works in
5731        // any join-list position. The lookup helper handles both
5732        // shapes uniformly.
5733        let (primary_rows, primary_cols) = self.materialise_table_ref(&from.primary)?;
5734        let primary_alias = from
5735            .primary
5736            .alias
5737            .as_deref()
5738            .unwrap_or(from.primary.name.as_str())
5739            .to_string();
5740        // Owned (rows, schema) per peer — borrows from the catalog
5741        // would not survive UNNEST-side materialisation.
5742        let mut joined: Vec<(Vec<Row>, Vec<ColumnSchema>, String, JoinKind, Option<&Expr>)> =
5743            Vec::new();
5744        for j in &from.joins {
5745            let (rows, cols) = self.materialise_table_ref(&j.table)?;
5746            let a = j
5747                .table
5748                .alias
5749                .as_deref()
5750                .unwrap_or(j.table.name.as_str())
5751                .to_string();
5752            joined.push((rows, cols, a, j.kind, j.on.as_ref()));
5753        }
5754
5755        // Build the combined schema: composite "alias.col" names so the
5756        // qualified-column resolver can find anything by exact match.
5757        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
5758        for col in &primary_cols {
5759            combined_schema.push(ColumnSchema::new(
5760                alloc::format!("{primary_alias}.{}", col.name),
5761                col.ty,
5762                col.nullable,
5763            ));
5764        }
5765        for (_, cols, a, _, _) in &joined {
5766            for col in cols {
5767                combined_schema.push(ColumnSchema::new(
5768                    alloc::format!("{a}.{}", col.name),
5769                    col.ty,
5770                    col.nullable,
5771                ));
5772            }
5773        }
5774        let ctx = EvalContext::new(&combined_schema, None);
5775
5776        // Nested-loop join.
5777        let mut working: Vec<Row> = primary_rows;
5778        let mut produced_len = primary_cols.len();
5779        for (rrows, rcols, _, kind, on) in &joined {
5780            let right_arity = rcols.len();
5781            let mut next: Vec<Row> = Vec::new();
5782            for left in &working {
5783                let mut left_matched = false;
5784                for right in rrows {
5785                    let mut combined_vals = left.values.clone();
5786                    combined_vals.extend(right.values.iter().cloned());
5787                    // Pad combined to the eventual full width so the
5788                    // partial schema still matches positions used by ON.
5789                    let combined = Row::new(combined_vals);
5790                    let keep = if let Some(on_expr) = on {
5791                        let cond = eval::eval_expr(on_expr, &combined, &ctx)?;
5792                        matches!(cond, Value::Bool(true))
5793                    } else {
5794                        // CROSS / comma-list: every pair survives.
5795                        true
5796                    };
5797                    if keep {
5798                        next.push(combined);
5799                        left_matched = true;
5800                    }
5801                }
5802                if !left_matched && matches!(kind, JoinKind::Left) {
5803                    // LEFT OUTER JOIN: emit the left row with NULLs on
5804                    // the right side when no peer matched.
5805                    let mut combined_vals = left.values.clone();
5806                    for _ in 0..right_arity {
5807                        combined_vals.push(Value::Null);
5808                    }
5809                    next.push(Row::new(combined_vals));
5810                }
5811            }
5812            working = next;
5813            produced_len += right_arity;
5814            debug_assert!(produced_len <= combined_schema.len());
5815        }
5816
5817        // WHERE filter against combined rows.
5818        let mut filtered: Vec<Row> = Vec::new();
5819        for row in working {
5820            if let Some(where_expr) = &stmt.where_ {
5821                let cond = eval::eval_expr(where_expr, &row, &ctx)?;
5822                if !matches!(cond, Value::Bool(true)) {
5823                    continue;
5824                }
5825            }
5826            filtered.push(row);
5827        }
5828
5829        // Aggregate path: handle GROUP BY / aggregate calls over the
5830        // joined+filtered rows.
5831        if aggregate::uses_aggregate(stmt) {
5832            let refs: Vec<&Row> = filtered.iter().collect();
5833            let mut agg = aggregate::run(stmt, &refs, &combined_schema, None)?;
5834            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5835            return Ok(QueryResult::Rows {
5836                columns: agg.columns,
5837                rows: agg.rows,
5838            });
5839        }
5840
5841        let projection = build_projection(&stmt.items, &combined_schema, "")?;
5842        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5843        for row in &filtered {
5844            let mut values = Vec::with_capacity(projection.len());
5845            for p in &projection {
5846                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5847            }
5848            let order_keys = if stmt.order_by.is_empty() {
5849                Vec::new()
5850            } else {
5851                build_order_keys(&stmt.order_by, row, &ctx)?
5852            };
5853            tagged.push((order_keys, Row::new(values)));
5854        }
5855        if !stmt.order_by.is_empty() {
5856            let keep = if stmt.distinct {
5857                None
5858            } else {
5859                stmt.limit_literal()
5860                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5861            };
5862            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5863            partial_sort_tagged(&mut tagged, keep, &descs);
5864        }
5865        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5866        if stmt.distinct {
5867            output_rows = dedup_rows(output_rows);
5868        }
5869        apply_offset_and_limit(
5870            &mut output_rows,
5871            stmt.offset_literal(),
5872            stmt.limit_literal(),
5873        );
5874        let columns: Vec<ColumnSchema> = projection
5875            .into_iter()
5876            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5877            .collect();
5878        Ok(QueryResult::Rows {
5879            columns,
5880            rows: output_rows,
5881        })
5882    }
5883}
5884
5885/// One row-producing projection: an expression to evaluate, the resulting
5886/// column's user-visible name, its inferred type, and nullability.
5887#[derive(Debug, Clone)]
5888struct ProjectedItem {
5889    expr: Expr,
5890    output_name: String,
5891    ty: DataType,
5892    nullable: bool,
5893}
5894
5895/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
5896/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
5897/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
5898/// the spec's "two NULLs are not distinct"; the second is a tolerated
5899/// quirk for v1 (no NaN literals are reachable from the SQL surface).
5900fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
5901    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
5902    for r in rows {
5903        if !out.iter().any(|seen| seen == &r) {
5904            out.push(r);
5905        }
5906    }
5907    out
5908}
5909
5910/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
5911/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
5912/// order via the byte values; vectors are not sortable.
5913fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
5914    match v {
5915        Value::Null => Ok(f64::INFINITY),
5916        Value::SmallInt(n) => Ok(f64::from(*n)),
5917        Value::Int(n) => Ok(f64::from(*n)),
5918        Value::Date(d) => Ok(f64::from(*d)),
5919        #[allow(clippy::cast_precision_loss)]
5920        Value::Timestamp(t) => Ok(*t as f64),
5921        #[allow(clippy::cast_precision_loss)]
5922        Value::Numeric { scaled, scale } => {
5923            // Scaled integer / 10^scale, computed via f64 for sort
5924            // ordering only. Precision losses here only matter for
5925            // ORDER BY tie-breaks well past 15 significant digits.
5926            // `f64::powi` lives in std; we hand-roll the loop so the
5927            // no_std engine crate doesn't need it.
5928            let mut divisor = 1.0_f64;
5929            for _ in 0..*scale {
5930                divisor *= 10.0;
5931            }
5932            Ok((*scaled as f64) / divisor)
5933        }
5934        #[allow(clippy::cast_precision_loss)]
5935        Value::BigInt(n) => Ok(*n as f64),
5936        Value::Float(x) => Ok(*x),
5937        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
5938        Value::Text(s) => {
5939            // Lex order by codepoints — good enough for ORDER BY name.
5940            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
5941            // partial_cmp Equal. v1.x can swap in a real string comparator.
5942            let mut key: u64 = 0;
5943            for &b in s.as_bytes().iter().take(8) {
5944                key = (key << 8) | u64::from(b);
5945            }
5946            #[allow(clippy::cast_precision_loss)]
5947            Ok(key as f64)
5948        }
5949        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
5950            Err(EngineError::Unsupported(
5951                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
5952            ))
5953        }
5954        Value::Interval { .. } => Err(EngineError::Unsupported(
5955            "ORDER BY of an INTERVAL is not supported in v2.11 \
5956             (months vs micros has no single canonical ordering)"
5957                .into(),
5958        )),
5959        Value::Json(_) => Err(EngineError::Unsupported(
5960            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
5961        )),
5962        // v7.5.0 — Value is #[non_exhaustive]; future variants need
5963        // an explicit ORDER BY mapping. Surface as Unsupported until
5964        // engine support is added.
5965        _ => Err(EngineError::Unsupported(
5966            "ORDER BY of this value type is not supported".into(),
5967        )),
5968    }
5969}
5970
5971/// Try to plan a WHERE clause as an equality lookup against an existing
5972/// index. Returns the candidate row indices on success; `None` means the
5973/// caller should fall back to a full scan.
5974///
5975/// v0.8 recognises a single top-level `col = literal` (in either operand
5976/// order). AND chains and range scans land in later milestones.
5977/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
5978/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
5979/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
5980/// present, the planner does an "over-fetch and filter" pass — it
5981/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
5982/// against each, and trims back to `k`. Returns the row indices in
5983/// ascending-distance order when the plan applies.
5984fn try_nsw_knn(
5985    stmt: &SelectStatement,
5986    table: &Table,
5987    schema_cols: &[ColumnSchema],
5988    table_alias: &str,
5989) -> Option<Vec<usize>> {
5990    if stmt.distinct {
5991        return None;
5992    }
5993    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
5994    if limit == 0 {
5995        return None;
5996    }
5997    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
5998    // distance metric. Multi-key ORDER BY falls through to the
5999    // generic sort path.
6000    if stmt.order_by.len() != 1 {
6001        return None;
6002    }
6003    let order = &stmt.order_by[0];
6004    // NSW kNN returns rows ascending by distance — DESC inverts the
6005    // natural order, so the planner can't handle it without a sort
6006    // pass. Fall back to the generic ORDER BY path.
6007    if order.desc {
6008        return None;
6009    }
6010    let Expr::Binary { lhs, op, rhs } = &order.expr else {
6011        return None;
6012    };
6013    let metric = match op {
6014        BinOp::L2Distance => spg_storage::NswMetric::L2,
6015        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
6016        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
6017        _ => return None,
6018    };
6019    // Accept both `col <op> literal` and `literal <op> col`.
6020    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
6021        (lhs.as_ref(), rhs.as_ref())
6022    else {
6023        return None;
6024    };
6025    if let Some(q) = &col.qualifier
6026        && q != table_alias
6027    {
6028        return None;
6029    }
6030    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
6031    let query = literal_to_vector(literal)?;
6032    let idx = spg_storage::nsw_index_on(table, col_pos)?;
6033    if let Some(where_expr) = &stmt.where_ {
6034        // Over-fetch and filter. The factor (10×) is a heuristic that
6035        // covers typical selectivity for the corpus tests; v2.x will
6036        // make it configurable.
6037        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
6038        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
6039        let ctx = EvalContext::new(schema_cols, Some(table_alias));
6040        let mut kept: Vec<usize> = Vec::with_capacity(limit);
6041        for i in candidates {
6042            let row = &table.rows()[i];
6043            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
6044            if matches!(cond, Value::Bool(true)) {
6045                kept.push(i);
6046                if kept.len() >= limit {
6047                    break;
6048                }
6049            }
6050        }
6051        Some(kept)
6052    } else {
6053        Some(spg_storage::nsw_query(
6054            table, &idx.name, &query, limit, metric,
6055        ))
6056    }
6057}
6058
6059/// Lower bound on the over-fetch pool when WHERE is present — even
6060/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
6061/// few WHERE rejections.
6062const NSW_OVER_FETCH_FLOOR: usize = 32;
6063
6064/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
6065/// `None` for anything we can't fold at plan time.
6066fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
6067    match e {
6068        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
6069        Expr::Cast { expr, .. } => literal_to_vector(expr),
6070        _ => None,
6071    }
6072}
6073
6074/// Materialise rows in a planner-supplied order (used by the NSW path)
6075/// without re-running ORDER BY. The projection + LIMIT slot mirror the
6076/// equivalent block in `exec_bare_select`.
6077fn materialise_in_order(
6078    stmt: &SelectStatement,
6079    table: &Table,
6080    schema_cols: &[ColumnSchema],
6081    table_alias: &str,
6082    ordered_rows: &[usize],
6083) -> Result<QueryResult, EngineError> {
6084    let ctx = EvalContext::new(schema_cols, Some(table_alias));
6085    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
6086    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
6087    for &i in ordered_rows {
6088        let row = &table.rows()[i];
6089        let mut values = Vec::with_capacity(projection.len());
6090        for p in &projection {
6091            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
6092        }
6093        output_rows.push(Row::new(values));
6094    }
6095    apply_offset_and_limit(
6096        &mut output_rows,
6097        stmt.offset_literal(),
6098        stmt.limit_literal(),
6099    );
6100    let columns: Vec<ColumnSchema> = projection
6101        .into_iter()
6102        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6103        .collect();
6104    Ok(QueryResult::Rows {
6105        columns,
6106        rows: output_rows,
6107    })
6108}
6109
6110fn try_index_seek<'a>(
6111    where_expr: &Expr,
6112    schema_cols: &[ColumnSchema],
6113    catalog: &'a Catalog,
6114    table: &'a Table,
6115    table_alias: &str,
6116) -> Option<Vec<Cow<'a, Row>>> {
6117    // v7.11.3 — recurse through top-level `AND` so a PG-style
6118    // composite predicate like `WHERE id = 1 AND created_at > $1`
6119    // still hits the index on `id`. The caller re-applies the
6120    // full WHERE expression to each returned row, so dropping the
6121    // residual conjuncts here is correct — the index just narrows
6122    // the candidate set.
6123    if let Expr::Binary {
6124        lhs,
6125        op: BinOp::And,
6126        rhs,
6127    } = where_expr
6128    {
6129        // Try LHS first (typical convention: leading equality on
6130        // the indexed column comes first in user-written SQL).
6131        if let Some(rows) = try_index_seek(lhs, schema_cols, catalog, table, table_alias) {
6132            return Some(rows);
6133        }
6134        return try_index_seek(rhs, schema_cols, catalog, table, table_alias);
6135    }
6136    let Expr::Binary {
6137        lhs,
6138        op: BinOp::Eq,
6139        rhs,
6140    } = where_expr
6141    else {
6142        return None;
6143    };
6144    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
6145        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
6146    let idx = table.index_on(col_pos)?;
6147    let key = IndexKey::from_value(&value)?;
6148    let locators = idx.lookup_eq(&key);
6149    let table_name = table.schema().name.as_str();
6150    // v5.1: each locator dispatches to either the hot tier (zero-
6151    // copy borrow of `table.rows()[i]`) or a cold-tier segment
6152    // (one page read + dense row decode, ~µs scale). Cold rows are
6153    // returned as `Cow::Owned` so the caller's `&Row` iteration
6154    // doesn't see a tier distinction; pre-freezer (no cold
6155    // segments loaded) every locator is `Hot` and every entry is
6156    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
6157    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
6158    for loc in locators {
6159        match *loc {
6160            spg_storage::RowLocator::Hot(i) => {
6161                if let Some(row) = table.rows().get(i) {
6162                    out.push(Cow::Borrowed(row));
6163                }
6164            }
6165            spg_storage::RowLocator::Cold { segment_id, .. } => {
6166                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
6167                    out.push(Cow::Owned(row));
6168                }
6169            }
6170        }
6171    }
6172    Some(out)
6173}
6174
6175/// v7.12.3 — GIN-accelerated candidate seek for `WHERE col @@ <ts_query>`.
6176///
6177/// Recurses through top-level `AND` like [`try_index_seek`] so a
6178/// composite predicate `WHERE search_vector @@ q AND id > $1` still
6179/// hits the GIN index on `search_vector` — the caller re-applies the
6180/// full WHERE expression to each returned candidate, so dropping the
6181/// `id > $1` residual here stays semantically correct.
6182///
6183/// Returns `None` when:
6184///   - no leaf is a `col @@ <rhs>` shape on a GIN-indexed column;
6185///   - the RHS can't be const-evaluated to a `Value::TsQuery`
6186///     (typically because it references row columns);
6187///   - the resolved `TsQuery` uses query shapes the MVP doesn't
6188///     accelerate (`Not`, `Phrase` — those fall through to full scan).
6189///
6190/// On `Some(rows)` the caller iterates only `rows` and re-evaluates
6191/// the full `@@` predicate per row, so an over-approximate candidate
6192/// set is safe.
6193fn try_gin_seek<'a>(
6194    where_expr: &Expr,
6195    schema_cols: &[ColumnSchema],
6196    catalog: &'a Catalog,
6197    table: &'a Table,
6198    table_alias: &str,
6199    ctx: &eval::EvalContext<'_>,
6200) -> Option<Vec<Cow<'a, Row>>> {
6201    if let Expr::Binary {
6202        lhs,
6203        op: BinOp::And,
6204        rhs,
6205    } = where_expr
6206    {
6207        if let Some(rows) = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx) {
6208            return Some(rows);
6209        }
6210        return try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx);
6211    }
6212    let Expr::Binary {
6213        lhs,
6214        op: BinOp::TsMatch,
6215        rhs,
6216    } = where_expr
6217    else {
6218        return None;
6219    };
6220    // Either side can be the column; pgvector idiom (`vec @@ q`)
6221    // hits the first arm, FROM-clause-derived (`plainto_tsquery($1)
6222    // q ... WHERE search_vector @@ q`) the same. CROSS JOIN derived
6223    // tables resolve `q` to a Column too.
6224    let (col_pos, query) = resolve_gin_col_query(lhs, rhs, schema_cols, table_alias, ctx)
6225        .or_else(|| resolve_gin_col_query(rhs, lhs, schema_cols, table_alias, ctx))?;
6226    let idx = table
6227        .indices()
6228        .iter()
6229        .find(|i| i.column_position == col_pos && i.is_gin())?;
6230    let candidates = gin_query_candidates(idx, &query)?;
6231    let _ = catalog; // cold-tier row resolution unused in MVP; see below.
6232    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(candidates.len());
6233    for loc in candidates {
6234        match loc {
6235            spg_storage::RowLocator::Hot(i) => {
6236                if let Some(row) = table.rows().get(i) {
6237                    out.push(Cow::Borrowed(row));
6238                }
6239            }
6240            // GIN cold-tier rows in the MVP: skipped, matching the
6241            // full-scan `@@` path which itself only iterates
6242            // `table.rows()` (hot tier). When v7.13+ adds cold-tier
6243            // scan-time materialisation for `@@`, the parallel
6244            // resolution lands here; until then both paths see the
6245            // same hot-only candidate set so correctness is preserved.
6246            spg_storage::RowLocator::Cold { .. } => {}
6247        }
6248    }
6249    Some(out)
6250}
6251
6252/// v7.15.0 — trigram-GIN-accelerated candidate seek for
6253/// `WHERE col LIKE '<pat>'` and `WHERE col ILIKE '<pat>'` when
6254/// the column has a `gin_trgm_ops` GIN index.
6255///
6256/// Walks top-level `AND` so multi-predicate WHEREs (`col LIKE
6257/// 'foo%' AND id > 1`) still hit the trigram index; the caller
6258/// re-evaluates the full WHERE per candidate row, so dropping
6259/// non-LIKE conjuncts here stays semantically correct.
6260///
6261/// Returns `None` when:
6262///   - no leaf is `col LIKE/ILIKE <literal>` on a trigram-GIN-
6263///     indexed column;
6264///   - the pattern's literal runs are too short to constrain
6265///     (pattern decomposes into `< 3`-char runs, e.g. `%ab%`);
6266///   - the pattern doesn't const-evaluate to a TEXT.
6267fn try_trgm_seek<'a>(
6268    where_expr: &Expr,
6269    schema_cols: &[ColumnSchema],
6270    table: &'a Table,
6271    table_alias: &str,
6272) -> Option<Vec<Cow<'a, Row>>> {
6273    if let Expr::Binary {
6274        lhs,
6275        op: BinOp::And,
6276        rhs,
6277    } = where_expr
6278    {
6279        if let Some(rows) = try_trgm_seek(lhs, schema_cols, table, table_alias) {
6280            return Some(rows);
6281        }
6282        return try_trgm_seek(rhs, schema_cols, table, table_alias);
6283    }
6284    // LIKE node is what carries the column reference + pattern.
6285    // ILIKE is the same AST node — PG's LIKE/ILIKE both lower
6286    // through `Expr::Like { expr, pattern, negated }`. The trigram
6287    // index posting-list keys are already lower-cased and
6288    // case-folded, so we only need the pattern's literal runs.
6289    let Expr::Like {
6290        expr, pattern, ..
6291    } = where_expr
6292    else {
6293        return None;
6294    };
6295    // Column side.
6296    let Expr::Column(c) = expr.as_ref() else {
6297        return None;
6298    };
6299    if let Some(q) = &c.qualifier
6300        && q != table_alias
6301    {
6302        return None;
6303    }
6304    let col_pos = schema_cols
6305        .iter()
6306        .position(|s| s.name.eq_ignore_ascii_case(&c.name))?;
6307    // Index must exist on that column AND be a trigram-GIN.
6308    let idx = table
6309        .indices()
6310        .iter()
6311        .find(|i| i.column_position == col_pos && i.is_gin_trgm())?;
6312    // Pattern side must be a literal TEXT — anything else (column
6313    // ref, function call, parameter that hasn't been bound yet)
6314    // falls through to full scan.
6315    let Expr::Literal(spg_sql::ast::Literal::String(pat)) = pattern.as_ref() else {
6316        return None;
6317    };
6318    let trigrams = spg_storage::trgm::trigrams_from_like_pattern(pat)?;
6319    // Intersect every trigram's posting list. Empty intersection
6320    // → empty candidate set (caller short-circuits its row loop).
6321    let mut iter = trigrams.iter();
6322    let first = iter.next()?;
6323    let mut acc: Vec<spg_storage::RowLocator> = {
6324        let mut v = idx.gin_trgm_lookup(first).to_vec();
6325        v.sort_by_key(locator_sort_key);
6326        v.dedup_by_key(|l| locator_sort_key(l));
6327        v
6328    };
6329    for tri in iter {
6330        let mut next: Vec<spg_storage::RowLocator> = idx.gin_trgm_lookup(tri).to_vec();
6331        next.sort_by_key(locator_sort_key);
6332        next.dedup_by_key(|l| locator_sort_key(l));
6333        // Sorted-merge intersection.
6334        let mut merged: Vec<spg_storage::RowLocator> = Vec::with_capacity(acc.len().min(next.len()));
6335        let (mut i, mut j) = (0usize, 0usize);
6336        while i < acc.len() && j < next.len() {
6337            let lk = locator_sort_key(&acc[i]);
6338            let rk = locator_sort_key(&next[j]);
6339            match lk.cmp(&rk) {
6340                core::cmp::Ordering::Less => i += 1,
6341                core::cmp::Ordering::Greater => j += 1,
6342                core::cmp::Ordering::Equal => {
6343                    merged.push(acc[i]);
6344                    i += 1;
6345                    j += 1;
6346                }
6347            }
6348        }
6349        acc = merged;
6350        if acc.is_empty() {
6351            break;
6352        }
6353    }
6354    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(acc.len());
6355    for loc in acc {
6356        if let spg_storage::RowLocator::Hot(i) = loc
6357            && let Some(row) = table.rows().get(i)
6358        {
6359            out.push(Cow::Borrowed(row));
6360        }
6361        // Cold-tier rows: skipped in MVP (same as try_gin_seek).
6362    }
6363    Some(out)
6364}
6365
6366/// v7.12.3 — extract `(column_position, TsQueryAst)` when one side of
6367/// the binary is a column reference to a GIN-indexed tsvector column
6368/// and the other side const-evaluates to a `Value::TsQuery`. Returns
6369/// `None` if the column reference is for the wrong table alias, or if
6370/// the RHS expression depends on row data.
6371fn resolve_gin_col_query(
6372    col_side: &Expr,
6373    query_side: &Expr,
6374    schema_cols: &[ColumnSchema],
6375    table_alias: &str,
6376    ctx: &eval::EvalContext<'_>,
6377) -> Option<(usize, spg_storage::TsQueryAst)> {
6378    let Expr::Column(c) = col_side else {
6379        return None;
6380    };
6381    if let Some(q) = &c.qualifier
6382        && q != table_alias
6383    {
6384        return None;
6385    }
6386    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6387    // Const-evaluate the query side with an empty row — fails fast
6388    // (with a `ColumnNotFound` / similar) if the expression actually
6389    // depends on row data, which is exactly the bail signal we want.
6390    let empty_row = Row::new(Vec::new());
6391    let v = eval::eval_expr(query_side, &empty_row, ctx).ok()?;
6392    let Value::TsQuery(q) = v else { return None };
6393    Some((pos, q))
6394}
6395
6396/// v7.12.3 — walk a `TsQueryAst` against an [`IndexKind::Gin`] index
6397/// to produce a candidate row-locator set. Returns `None` for query
6398/// shapes the MVP doesn't accelerate (`Not` / `Phrase` — both bail to
6399/// full scan since their semantics need either complementation across
6400/// the whole row set or positional verification beyond what the
6401/// posting list carries).
6402///
6403/// Candidate sets are over-approximate — the caller re-applies the
6404/// full `@@` predicate per row, so reporting "row was in some
6405/// posting list" without verifying positions / weights stays correct.
6406fn gin_query_candidates(
6407    idx: &spg_storage::Index,
6408    query: &spg_storage::TsQueryAst,
6409) -> Option<Vec<spg_storage::RowLocator>> {
6410    use spg_storage::TsQueryAst;
6411    match query {
6412        TsQueryAst::Term { word, .. } => {
6413            let mut v: Vec<spg_storage::RowLocator> = idx.gin_lookup_word(word).to_vec();
6414            v.sort_by_key(locator_sort_key);
6415            v.dedup_by_key(|l| locator_sort_key(l));
6416            Some(v)
6417        }
6418        TsQueryAst::And(l, r) => {
6419            let mut left = gin_query_candidates(idx, l)?;
6420            let mut right = gin_query_candidates(idx, r)?;
6421            left.sort_by_key(locator_sort_key);
6422            right.sort_by_key(locator_sort_key);
6423            // Sorted-merge intersection.
6424            let mut out: Vec<spg_storage::RowLocator> = Vec::new();
6425            let (mut i, mut j) = (0usize, 0usize);
6426            while i < left.len() && j < right.len() {
6427                let lk = locator_sort_key(&left[i]);
6428                let rk = locator_sort_key(&right[j]);
6429                match lk.cmp(&rk) {
6430                    core::cmp::Ordering::Less => i += 1,
6431                    core::cmp::Ordering::Greater => j += 1,
6432                    core::cmp::Ordering::Equal => {
6433                        out.push(left[i]);
6434                        i += 1;
6435                        j += 1;
6436                    }
6437                }
6438            }
6439            Some(out)
6440        }
6441        TsQueryAst::Or(l, r) => {
6442            let mut out = gin_query_candidates(idx, l)?;
6443            out.extend(gin_query_candidates(idx, r)?);
6444            out.sort_by_key(locator_sort_key);
6445            out.dedup_by_key(|l| locator_sort_key(l));
6446            Some(out)
6447        }
6448        // Not / Phrase bail to full scan in the MVP. Not needs
6449        // complementation against the whole row set (not represented
6450        // in the posting-list view); Phrase needs positional
6451        // verification beyond what `word → rows` carries.
6452        TsQueryAst::Not(_) | TsQueryAst::Phrase { .. } => None,
6453    }
6454}
6455
6456/// v7.12.3 — total ordering on `RowLocator` for sort/dedup purposes
6457/// inside the GIN intersection / union loops. Hot rows order by their
6458/// row index; Cold rows order after all Hot rows, then by
6459/// `(segment_id, the cold sub-key)`.
6460fn locator_sort_key(l: &spg_storage::RowLocator) -> (u8, u64, u64) {
6461    match *l {
6462        spg_storage::RowLocator::Hot(i) => (0, i as u64, 0),
6463        spg_storage::RowLocator::Cold {
6464            segment_id,
6465            page_offset,
6466        } => (1, u64::from(segment_id), u64::from(page_offset)),
6467    }
6468}
6469
6470/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
6471/// is a simple `col = literal` predicate suitable for a `BTree` index
6472/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
6473/// decide whether a write touches a cold-tier row (which requires
6474/// promote-on-write / shadow-on-delete) before falling through to
6475/// the hot-tier row walk.
6476///
6477/// Returns `None` for any predicate shape the planner can't push
6478/// down to an index seek — complex WHERE clauses always take the
6479/// hot-only path (cold rows are immutable to non-indexed writes
6480/// until a future scan-fanout sub-version).
6481fn try_pk_predicate(
6482    where_expr: &Expr,
6483    schema_cols: &[ColumnSchema],
6484    table_alias: &str,
6485) -> Option<(usize, IndexKey)> {
6486    let Expr::Binary {
6487        lhs,
6488        op: BinOp::Eq,
6489        rhs,
6490    } = where_expr
6491    else {
6492        return None;
6493    };
6494    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
6495        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
6496    let key = IndexKey::from_value(&value)?;
6497    Some((col_pos, key))
6498}
6499
6500fn resolve_col_literal_pair(
6501    col_side: &Expr,
6502    lit_side: &Expr,
6503    schema_cols: &[ColumnSchema],
6504    table_alias: &str,
6505) -> Option<(usize, Value)> {
6506    let Expr::Column(c) = col_side else {
6507        return None;
6508    };
6509    if let Some(q) = &c.qualifier
6510        && q != table_alias
6511    {
6512        return None;
6513    }
6514    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6515    let Expr::Literal(l) = lit_side else {
6516        return None;
6517    };
6518    let v = match l {
6519        Literal::Integer(n) => {
6520            if let Ok(small) = i32::try_from(*n) {
6521                Value::Int(small)
6522            } else {
6523                Value::BigInt(*n)
6524            }
6525        }
6526        Literal::Float(x) => Value::Float(*x),
6527        Literal::String(s) => Value::Text(s.clone()),
6528        Literal::Bool(b) => Value::Bool(*b),
6529        Literal::Null => Value::Null,
6530        // Vector and Interval literals can't be used as B-tree index keys.
6531        // Tell the planner to fall back to full-scan.
6532        Literal::Vector(_) | Literal::Interval { .. } => return None,
6533    };
6534    Some((pos, v))
6535}
6536
6537/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
6538/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
6539/// `EngineError` so the projection-build path keeps `UnknownQualifier`
6540/// vs `ColumnNotFound` distinct.
6541fn resolve_projection_column<'a>(
6542    c: &ColumnName,
6543    schema_cols: &'a [ColumnSchema],
6544    table_alias: &str,
6545) -> Result<&'a ColumnSchema, EngineError> {
6546    if let Some(q) = &c.qualifier {
6547        let composite = alloc::format!("{q}.{name}", name = c.name);
6548        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
6549            return Ok(s);
6550        }
6551        // Single-table case: the qualifier may equal the active alias —
6552        // then look for the bare column name.
6553        if q == table_alias
6554            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
6555        {
6556            return Ok(s);
6557        }
6558        // For multi-table schemas the qualifier is unknown only if no
6559        // column bears the "<q>." prefix. For single-table, the alias
6560        // mismatch alone is enough.
6561        let prefix = alloc::format!("{q}.");
6562        let qualifier_known =
6563            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
6564        if !qualifier_known {
6565            return Err(EngineError::Eval(EvalError::UnknownQualifier {
6566                qualifier: q.clone(),
6567            }));
6568        }
6569        return Err(EngineError::Eval(EvalError::ColumnNotFound {
6570            name: c.name.clone(),
6571        }));
6572    }
6573    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
6574        return Ok(s);
6575    }
6576    let suffix = alloc::format!(".{name}", name = c.name);
6577    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
6578    let first = matches.next();
6579    let extra = matches.next();
6580    match (first, extra) {
6581        (Some(s), None) => Ok(s),
6582        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
6583            detail: alloc::format!("ambiguous column reference: {}", c.name),
6584        })),
6585        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
6586            name: c.name.clone(),
6587        })),
6588    }
6589}
6590
6591fn build_projection(
6592    items: &[SelectItem],
6593    schema_cols: &[ColumnSchema],
6594    table_alias: &str,
6595) -> Result<Vec<ProjectedItem>, EngineError> {
6596    let mut out = Vec::new();
6597    for item in items {
6598        match item {
6599            SelectItem::Wildcard => {
6600                for col in schema_cols {
6601                    out.push(ProjectedItem {
6602                        expr: Expr::Column(ColumnName {
6603                            qualifier: None,
6604                            name: col.name.clone(),
6605                        }),
6606                        output_name: col.name.clone(),
6607                        ty: col.ty,
6608                        nullable: col.nullable,
6609                    });
6610                }
6611            }
6612            SelectItem::Expr { expr, alias } => {
6613                // Plain column ref keeps full schema info (real type +
6614                // nullability). Compound expressions evaluate fine but have
6615                // no static type — surface them as nullable TEXT, which is
6616                // what most clients render anyway.
6617                if let Expr::Column(c) = expr {
6618                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
6619                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
6620                    out.push(ProjectedItem {
6621                        expr: expr.clone(),
6622                        output_name,
6623                        ty: sch.ty,
6624                        nullable: sch.nullable,
6625                    });
6626                } else {
6627                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
6628                    out.push(ProjectedItem {
6629                        expr: expr.clone(),
6630                        output_name,
6631                        ty: DataType::Text,
6632                        nullable: true,
6633                    });
6634                }
6635            }
6636        }
6637    }
6638    Ok(out)
6639}
6640
6641/// Promote an integer to a NUMERIC value at the requested scale.
6642/// Rejects values that, after scaling, would overflow the column's
6643/// precision budget.
6644fn numeric_from_integer(
6645    n: i128,
6646    precision: u8,
6647    scale: u8,
6648    col_name: &str,
6649) -> Result<Value, EngineError> {
6650    let factor = pow10_i128(scale);
6651    let scaled = n.checked_mul(factor).ok_or_else(|| {
6652        EngineError::Unsupported(alloc::format!(
6653            "integer overflow scaling value for column `{col_name}` to scale {scale}"
6654        ))
6655    })?;
6656    check_precision(scaled, precision, col_name)?;
6657    Ok(Value::Numeric { scaled, scale })
6658}
6659
6660/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
6661/// then verifies the result fits the column's precision.
6662#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
6663fn numeric_from_float(
6664    x: f64,
6665    precision: u8,
6666    scale: u8,
6667    col_name: &str,
6668) -> Result<Value, EngineError> {
6669    if !x.is_finite() {
6670        return Err(EngineError::Unsupported(alloc::format!(
6671            "cannot store non-finite float in NUMERIC column `{col_name}`"
6672        )));
6673    }
6674    let mut factor = 1.0_f64;
6675    for _ in 0..scale {
6676        factor *= 10.0;
6677    }
6678    // Round half-away-from-zero by biasing then casting (`as i128`
6679    // truncates toward zero, so the bias + truncation gives the
6680    // desired rounding). `f64::floor` / `ceil` live in std; we don't
6681    // need them — the cast handles the truncation step.
6682    let shifted = x * factor;
6683    let biased = if shifted >= 0.0 {
6684        shifted + 0.5
6685    } else {
6686        shifted - 0.5
6687    };
6688    // Range-check before casting back to i128 — the cast itself is
6689    // saturating in Rust, which would silently truncate huge inputs.
6690    if !(-1e38..=1e38).contains(&biased) {
6691        return Err(EngineError::Unsupported(alloc::format!(
6692            "value {x} overflows NUMERIC range for column `{col_name}`"
6693        )));
6694    }
6695    let scaled = biased as i128;
6696    check_precision(scaled, precision, col_name)?;
6697    Ok(Value::Numeric { scaled, scale })
6698}
6699
6700/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
6701/// multiplies by 10; going down rounds half-away-from-zero.
6702fn numeric_rescale(
6703    scaled: i128,
6704    src_scale: u8,
6705    precision: u8,
6706    dst_scale: u8,
6707    col_name: &str,
6708) -> Result<Value, EngineError> {
6709    let new_scaled = if dst_scale >= src_scale {
6710        let bump = pow10_i128(dst_scale - src_scale);
6711        scaled.checked_mul(bump).ok_or_else(|| {
6712            EngineError::Unsupported(alloc::format!(
6713                "overflow rescaling NUMERIC for column `{col_name}`"
6714            ))
6715        })?
6716    } else {
6717        let drop = pow10_i128(src_scale - dst_scale);
6718        let half = drop / 2;
6719        if scaled >= 0 {
6720            (scaled + half) / drop
6721        } else {
6722            (scaled - half) / drop
6723        }
6724    };
6725    check_precision(new_scaled, precision, col_name)?;
6726    Ok(Value::Numeric {
6727        scaled: new_scaled,
6728        scale: dst_scale,
6729    })
6730}
6731
6732/// Drop the fractional part of a scaled integer, returning the integer
6733/// portion (toward zero). Used for NUMERIC → INT casts.
6734const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
6735    if scale == 0 {
6736        return scaled;
6737    }
6738    let factor = pow10_i128_const(scale);
6739    scaled / factor
6740}
6741
6742/// Verify a scaled NUMERIC value fits the column's declared precision.
6743/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
6744/// skip the check there.
6745fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
6746    if precision == 0 {
6747        return Ok(());
6748    }
6749    let limit = pow10_i128(precision);
6750    if scaled.unsigned_abs() >= limit.unsigned_abs() {
6751        return Err(EngineError::Unsupported(alloc::format!(
6752            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
6753        )));
6754    }
6755    Ok(())
6756}
6757
6758const fn pow10_i128_const(p: u8) -> i128 {
6759    let mut acc: i128 = 1;
6760    let mut i = 0;
6761    while i < p {
6762        acc *= 10;
6763        i += 1;
6764    }
6765    acc
6766}
6767
6768fn pow10_i128(p: u8) -> i128 {
6769    pow10_i128_const(p)
6770}
6771
6772/// Walk a parsed `Statement`, swapping any `NOW()` /
6773/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
6774/// literal cast that wraps the engine's per-statement clock reading.
6775/// When `now_micros` is `None`, calls stay as-is and surface as
6776/// `unknown function` at eval time — keeps the error path explicit.
6777/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
6778/// replace every subquery node with a materialised literal. SPG
6779/// only supports uncorrelated subqueries — the inner SELECT does
6780/// not see outer-row columns, so the result is the same for every
6781/// outer row and can be evaluated once.
6782///
6783/// Returns the rewritten statement; the caller passes this to the
6784/// regular row-loop executor which no longer sees Subquery nodes
6785/// in its tree.
6786impl Engine {
6787    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
6788    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
6789    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
6790    /// 1. Apply the WHERE filter.
6791    /// 2. For each unique `WindowFunction` node in the projection,
6792    ///    partition + sort, compute the per-row value.
6793    /// 3. Append the window values as synthetic columns (`__win_N`)
6794    ///    to the row schema.
6795    /// 4. Rewrite the projection to read those columns.
6796    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
6797    #[allow(
6798        clippy::too_many_lines,
6799        clippy::type_complexity,
6800        clippy::needless_range_loop
6801    )] // window-eval is one cohesive pipe; splitting fragments
6802    fn exec_select_with_window(
6803        &self,
6804        stmt: &SelectStatement,
6805        cancel: CancelToken<'_>,
6806    ) -> Result<QueryResult, EngineError> {
6807        let from = stmt.from.as_ref().ok_or_else(|| {
6808            EngineError::Unsupported("window functions require a FROM clause".into())
6809        })?;
6810        // For v4.12 we only support a single-table FROM. Joins +
6811        // windows is queued for v5.x.
6812        if !from.joins.is_empty() {
6813            return Err(EngineError::Unsupported(
6814                "JOIN with window functions not yet supported".into(),
6815            ));
6816        }
6817        let primary = &from.primary;
6818        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
6819            StorageError::TableNotFound {
6820                name: primary.name.clone(),
6821            }
6822        })?;
6823        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
6824        let schema_cols = &table.schema().columns;
6825        let ctx = self.ev_ctx(schema_cols, Some(alias));
6826
6827        // 1) Filter pass.
6828        let mut filtered: Vec<&Row> = Vec::new();
6829        for (i, row) in table.rows().iter().enumerate() {
6830            if i.is_multiple_of(256) {
6831                cancel.check()?;
6832            }
6833            if let Some(w) = &stmt.where_ {
6834                let cond = eval::eval_expr(w, row, &ctx)?;
6835                if !matches!(cond, Value::Bool(true)) {
6836                    continue;
6837                }
6838            }
6839            filtered.push(row);
6840        }
6841        let n_rows = filtered.len();
6842
6843        // 2) Collect unique window function nodes from projection.
6844        let mut window_nodes: Vec<Expr> = Vec::new();
6845        for item in &stmt.items {
6846            if let SelectItem::Expr { expr, .. } = item {
6847                collect_window_nodes(expr, &mut window_nodes);
6848            }
6849        }
6850
6851        // 3) For each window, compute per-row value.
6852        // Index: same order as window_nodes; for row i, win_vals[w][i].
6853        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
6854        for wnode in &window_nodes {
6855            let Expr::WindowFunction {
6856                name,
6857                args,
6858                partition_by,
6859                order_by,
6860                frame,
6861                null_treatment,
6862            } = wnode
6863            else {
6864                unreachable!("collect_window_nodes pushes only WindowFunction");
6865            };
6866            // Compute (partition_key, order_key, original_index) for each row.
6867            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool)>, usize)> =
6868                Vec::with_capacity(n_rows);
6869            for (i, row) in filtered.iter().enumerate() {
6870                let pkey: Vec<Value> = partition_by
6871                    .iter()
6872                    .map(|p| eval::eval_expr(p, row, &ctx))
6873                    .collect::<Result<_, _>>()?;
6874                let okey: Vec<(Value, bool)> = order_by
6875                    .iter()
6876                    .map(|(e, desc)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc)))
6877                    .collect::<Result<_, _>>()?;
6878                indexed.push((pkey, okey, i));
6879            }
6880            // Sort by (partition_key, order_key). Partition key uses
6881            // a stable encoded form; order key respects ASC/DESC.
6882            indexed.sort_by(|a, b| {
6883                let p_cmp = partition_key_cmp(&a.0, &b.0);
6884                if p_cmp != core::cmp::Ordering::Equal {
6885                    return p_cmp;
6886                }
6887                order_key_cmp(&a.1, &b.1)
6888            });
6889            // Per-partition compute.
6890            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
6891            let mut p_start = 0;
6892            while p_start < indexed.len() {
6893                let mut p_end = p_start + 1;
6894                while p_end < indexed.len()
6895                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
6896                        == core::cmp::Ordering::Equal
6897                {
6898                    p_end += 1;
6899                }
6900                // Compute the function within this partition slice.
6901                compute_window_partition(
6902                    name,
6903                    args,
6904                    !order_by.is_empty(),
6905                    frame.as_ref(),
6906                    *null_treatment,
6907                    &indexed[p_start..p_end],
6908                    &filtered,
6909                    &ctx,
6910                    &mut out_vals,
6911                )?;
6912                p_start = p_end;
6913            }
6914            win_vals.push(out_vals);
6915        }
6916
6917        // 4) Build extended schema: original columns + synthetic.
6918        let mut ext_cols = schema_cols.clone();
6919        for i in 0..window_nodes.len() {
6920            ext_cols.push(ColumnSchema::new(
6921                alloc::format!("__win_{i}"),
6922                DataType::Text, // type doesn't matter for projection eval
6923                true,
6924            ));
6925        }
6926        // 5) Build extended rows: each row gets its window values appended.
6927        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
6928        for i in 0..n_rows {
6929            let mut values = filtered[i].values.clone();
6930            for w in 0..window_nodes.len() {
6931                values.push(win_vals[w][i].clone());
6932            }
6933            ext_rows.push(Row::new(values));
6934        }
6935        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
6936        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
6937        for item in &stmt.items {
6938            let new_item = match item {
6939                SelectItem::Wildcard => SelectItem::Wildcard,
6940                SelectItem::Expr { expr, alias } => {
6941                    let mut e = expr.clone();
6942                    rewrite_window_to_columns(&mut e, &window_nodes);
6943                    SelectItem::Expr {
6944                        expr: e,
6945                        alias: alias.clone(),
6946                    }
6947                }
6948            };
6949            rewritten_items.push(new_item);
6950        }
6951
6952        // 7) Project into final rows.
6953        let ext_ctx = EvalContext::new(&ext_cols, Some(alias));
6954        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
6955        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
6956        for (i, row) in ext_rows.iter().enumerate() {
6957            if i.is_multiple_of(256) {
6958                cancel.check()?;
6959            }
6960            let mut values = Vec::with_capacity(projection.len());
6961            for p in &projection {
6962                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
6963            }
6964            let order_keys = if stmt.order_by.is_empty() {
6965                Vec::new()
6966            } else {
6967                let mut keys = Vec::with_capacity(stmt.order_by.len());
6968                for o in &stmt.order_by {
6969                    let mut e = o.expr.clone();
6970                    rewrite_window_to_columns(&mut e, &window_nodes);
6971                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
6972                    keys.push(value_to_order_key(&key)?);
6973                }
6974                keys
6975            };
6976            tagged.push((order_keys, Row::new(values)));
6977        }
6978        // ORDER BY + LIMIT/OFFSET on the projected rows.
6979        if !stmt.order_by.is_empty() {
6980            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
6981            sort_by_keys(&mut tagged, &descs);
6982        }
6983        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
6984        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
6985        let final_cols: Vec<ColumnSchema> = projection
6986            .into_iter()
6987            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6988            .collect();
6989        Ok(QueryResult::Rows {
6990            columns: final_cols,
6991            rows: out_rows,
6992        })
6993    }
6994
6995    /// v4.11: materialise each CTE into a temp table inside a
6996    /// cloned catalog, then run the body SELECT against a fresh
6997    /// engine instance that owns the enriched catalog. The clone
6998    /// is moderately expensive — only paid by CTE-bearing queries.
6999    /// Subqueries inside CTE bodies / the main body resolve as
7000    /// usual; `clock_fn` is propagated so `NOW()` lines up.
7001    fn exec_with_ctes(
7002        &self,
7003        stmt: &SelectStatement,
7004        cancel: CancelToken<'_>,
7005    ) -> Result<QueryResult, EngineError> {
7006        cancel.check()?;
7007        let mut catalog = self.active_catalog().clone();
7008        for cte in &stmt.ctes {
7009            if catalog.get(&cte.name).is_some() {
7010                return Err(EngineError::Unsupported(alloc::format!(
7011                    "CTE name {:?} shadows an existing table; rename the CTE",
7012                    cte.name
7013                )));
7014            }
7015            let (columns, rows) = if cte.recursive {
7016                self.materialise_recursive_cte(cte, &catalog, cancel)?
7017            } else {
7018                let body_result = self.exec_select_cancel(&cte.body, cancel)?;
7019                let QueryResult::Rows { columns, rows } = body_result else {
7020                    return Err(EngineError::Unsupported(alloc::format!(
7021                        "CTE {:?} body did not return rows",
7022                        cte.name
7023                    )));
7024                };
7025                (columns, rows)
7026            };
7027            // v4.22: the projection builder labels any non-column
7028            // expression as Text — including literal SELECT 1.
7029            // Promote each column's type to whatever the rows
7030            // actually carry so the CTE storage table accepts them.
7031            let inferred = infer_column_types(&columns, &rows);
7032            let mut columns = inferred;
7033            // v4.22: apply optional `WITH name(a, b, c)` overrides.
7034            if !cte.column_overrides.is_empty() {
7035                if cte.column_overrides.len() != columns.len() {
7036                    return Err(EngineError::Unsupported(alloc::format!(
7037                        "CTE {:?} column list has {} names but body returns {} columns",
7038                        cte.name,
7039                        cte.column_overrides.len(),
7040                        columns.len()
7041                    )));
7042                }
7043                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
7044                    col.name.clone_from(name);
7045                }
7046            }
7047            let schema = TableSchema::new(cte.name.clone(), columns);
7048            catalog.create_table(schema).map_err(EngineError::Storage)?;
7049            let table = catalog
7050                .get_mut(&cte.name)
7051                .expect("just-created CTE table must exist");
7052            for row in rows {
7053                table.insert(row).map_err(EngineError::Storage)?;
7054            }
7055        }
7056        // Strip CTEs from the body before running on the temp engine
7057        // so we don't recurse forever.
7058        let mut body = stmt.clone();
7059        body.ctes = Vec::new();
7060        let mut temp = Engine::restore(catalog);
7061        if let Some(c) = self.clock {
7062            temp = temp.with_clock(c);
7063        }
7064        if let Some(f) = self.salt_fn {
7065            temp = temp.with_salt_fn(f);
7066        }
7067        temp.exec_select_cancel(&body, cancel)
7068    }
7069
7070    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
7071    /// UNION (or UNION ALL) of an anchor that does not reference
7072    /// the CTE name, and one or more recursive terms that do. The
7073    /// anchor runs first; each subsequent iteration runs the
7074    /// recursive term against a temp catalog where the CTE name is
7075    /// bound to the *previous* iteration's output. Iteration stops
7076    /// when the recursive term yields no rows; UNION (DISTINCT)
7077    /// deduplicates against the accumulated result, UNION ALL does
7078    /// not. A hard cap on total rows prevents runaway queries.
7079    #[allow(clippy::too_many_lines)]
7080    fn materialise_recursive_cte(
7081        &self,
7082        cte: &spg_sql::ast::Cte,
7083        base_catalog: &Catalog,
7084        cancel: CancelToken<'_>,
7085    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
7086        const MAX_TOTAL_ROWS: usize = 1_000_000;
7087        const MAX_ITERATIONS: usize = 100_000;
7088        cancel.check()?;
7089        if cte.body.unions.is_empty() {
7090            return Err(EngineError::Unsupported(alloc::format!(
7091                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
7092                cte.name
7093            )));
7094        }
7095        // Anchor: the body's leading SELECT, with unions stripped.
7096        let mut anchor = cte.body.clone();
7097        let union_terms = core::mem::take(&mut anchor.unions);
7098        anchor.ctes = Vec::new();
7099        // Anchor must not reference the CTE name.
7100        if select_refers_to(&anchor, &cte.name) {
7101            return Err(EngineError::Unsupported(alloc::format!(
7102                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
7103                cte.name
7104            )));
7105        }
7106        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
7107        let QueryResult::Rows {
7108            columns: anchor_cols,
7109            rows: anchor_rows,
7110        } = anchor_result
7111        else {
7112            return Err(EngineError::Unsupported(alloc::format!(
7113                "WITH RECURSIVE {:?}: anchor did not return rows",
7114                cte.name
7115            )));
7116        };
7117        // The projection builder labels non-column expressions Text;
7118        // refine column types from the anchor's actual values so the
7119        // intermediate iter-catalog tables accept them.
7120        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
7121        if !cte.column_overrides.is_empty() {
7122            if cte.column_overrides.len() != columns.len() {
7123                return Err(EngineError::Unsupported(alloc::format!(
7124                    "CTE {:?} column list has {} names but anchor returns {} columns",
7125                    cte.name,
7126                    cte.column_overrides.len(),
7127                    columns.len()
7128                )));
7129            }
7130            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
7131                col.name.clone_from(name);
7132            }
7133        }
7134        let mut all_rows: Vec<Row> = anchor_rows.clone();
7135        let mut working_set: Vec<Row> = anchor_rows;
7136        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
7137        // Track at least one "all UNION ALL" flag — if every union
7138        // kind is ALL we skip the dedup step (faster + matches PG).
7139        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
7140        if !all_union_all {
7141            for r in &all_rows {
7142                seen.insert(encode_row_key(r));
7143            }
7144        }
7145        for iter in 0..MAX_ITERATIONS {
7146            cancel.check()?;
7147            if working_set.is_empty() {
7148                break;
7149            }
7150            // Build a fresh catalog: base + CTE bound to working_set.
7151            let mut iter_catalog = base_catalog.clone();
7152            let schema = TableSchema::new(cte.name.clone(), columns.clone());
7153            iter_catalog
7154                .create_table(schema)
7155                .map_err(EngineError::Storage)?;
7156            {
7157                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
7158                for row in &working_set {
7159                    table.insert(row.clone()).map_err(EngineError::Storage)?;
7160                }
7161            }
7162            let mut iter_engine = Engine::restore(iter_catalog);
7163            if let Some(c) = self.clock {
7164                iter_engine = iter_engine.with_clock(c);
7165            }
7166            if let Some(f) = self.salt_fn {
7167                iter_engine = iter_engine.with_salt_fn(f);
7168            }
7169            // Run each recursive term in sequence and collect new rows.
7170            let mut next_set: Vec<Row> = Vec::new();
7171            for (_, term) in &union_terms {
7172                let mut term = term.clone();
7173                term.ctes = Vec::new();
7174                let r = iter_engine.exec_select_cancel(&term, cancel)?;
7175                let QueryResult::Rows {
7176                    columns: rc,
7177                    rows: rs,
7178                } = r
7179                else {
7180                    return Err(EngineError::Unsupported(alloc::format!(
7181                        "WITH RECURSIVE {:?}: recursive term did not return rows",
7182                        cte.name
7183                    )));
7184                };
7185                if rc.len() != columns.len() {
7186                    return Err(EngineError::Unsupported(alloc::format!(
7187                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
7188                        cte.name,
7189                        rc.len(),
7190                        columns.len()
7191                    )));
7192                }
7193                for row in rs {
7194                    if !all_union_all {
7195                        let key = encode_row_key(&row);
7196                        if !seen.insert(key) {
7197                            continue;
7198                        }
7199                    }
7200                    next_set.push(row);
7201                }
7202            }
7203            if next_set.is_empty() {
7204                break;
7205            }
7206            all_rows.extend(next_set.iter().cloned());
7207            working_set = next_set;
7208            if all_rows.len() > MAX_TOTAL_ROWS {
7209                return Err(EngineError::Unsupported(alloc::format!(
7210                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
7211                    cte.name
7212                )));
7213            }
7214            if iter + 1 == MAX_ITERATIONS {
7215                return Err(EngineError::Unsupported(alloc::format!(
7216                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
7217                    cte.name
7218                )));
7219            }
7220        }
7221        Ok((columns, all_rows))
7222    }
7223
7224    fn resolve_select_subqueries(
7225        &self,
7226        stmt: &mut SelectStatement,
7227        cancel: CancelToken<'_>,
7228    ) -> Result<(), EngineError> {
7229        for item in &mut stmt.items {
7230            if let SelectItem::Expr { expr, .. } = item {
7231                self.resolve_expr_subqueries(expr, cancel)?;
7232            }
7233        }
7234        if let Some(w) = &mut stmt.where_ {
7235            self.resolve_expr_subqueries(w, cancel)?;
7236        }
7237        if let Some(gs) = &mut stmt.group_by {
7238            for g in gs {
7239                self.resolve_expr_subqueries(g, cancel)?;
7240            }
7241        }
7242        if let Some(h) = &mut stmt.having {
7243            self.resolve_expr_subqueries(h, cancel)?;
7244        }
7245        for o in &mut stmt.order_by {
7246            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
7247        }
7248        for (_, peer) in &mut stmt.unions {
7249            self.resolve_select_subqueries(peer, cancel)?;
7250        }
7251        Ok(())
7252    }
7253
7254    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
7255    fn resolve_expr_subqueries(
7256        &self,
7257        e: &mut Expr,
7258        cancel: CancelToken<'_>,
7259    ) -> Result<(), EngineError> {
7260        // Replace-on-this-node cases first.
7261        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
7262            *e = replacement;
7263            return Ok(());
7264        }
7265        match e {
7266            Expr::Binary { lhs, rhs, .. } => {
7267                self.resolve_expr_subqueries(lhs, cancel)?;
7268                self.resolve_expr_subqueries(rhs, cancel)?;
7269            }
7270            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7271                self.resolve_expr_subqueries(expr, cancel)?;
7272            }
7273            Expr::FunctionCall { args, .. } => {
7274                for a in args {
7275                    self.resolve_expr_subqueries(a, cancel)?;
7276                }
7277            }
7278            Expr::Like { expr, pattern, .. } => {
7279                self.resolve_expr_subqueries(expr, cancel)?;
7280                self.resolve_expr_subqueries(pattern, cancel)?;
7281            }
7282            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
7283            // v4.12 window functions — recurse into args + ORDER BY
7284            // + PARTITION BY in case they carry inner subqueries.
7285            Expr::WindowFunction {
7286                args,
7287                partition_by,
7288                order_by,
7289                ..
7290            } => {
7291                for a in args {
7292                    self.resolve_expr_subqueries(a, cancel)?;
7293                }
7294                for p in partition_by {
7295                    self.resolve_expr_subqueries(p, cancel)?;
7296                }
7297                for (e, _) in order_by {
7298                    self.resolve_expr_subqueries(e, cancel)?;
7299                }
7300            }
7301            // Subquery nodes are handled in subquery_replacement
7302            // (which returned None — defensive no-op); Literal /
7303            // Column are leaves.
7304            Expr::ScalarSubquery(_)
7305            | Expr::Exists { .. }
7306            | Expr::InSubquery { .. }
7307            | Expr::Literal(_)
7308            | Expr::Placeholder(_)
7309            | Expr::Column(_) => {}
7310            // v7.10.10 — recurse children.
7311            Expr::Array(items) => {
7312                for elem in items {
7313                    self.resolve_expr_subqueries(elem, cancel)?;
7314                }
7315            }
7316            Expr::ArraySubscript { target, index } => {
7317                self.resolve_expr_subqueries(target, cancel)?;
7318                self.resolve_expr_subqueries(index, cancel)?;
7319            }
7320            Expr::AnyAll { expr, array, .. } => {
7321                self.resolve_expr_subqueries(expr, cancel)?;
7322                self.resolve_expr_subqueries(array, cancel)?;
7323            }
7324            Expr::Case {
7325                operand,
7326                branches,
7327                else_branch,
7328            } => {
7329                if let Some(o) = operand {
7330                    self.resolve_expr_subqueries(o, cancel)?;
7331                }
7332                for (w, t) in branches {
7333                    self.resolve_expr_subqueries(w, cancel)?;
7334                    self.resolve_expr_subqueries(t, cancel)?;
7335                }
7336                if let Some(e) = else_branch {
7337                    self.resolve_expr_subqueries(e, cancel)?;
7338                }
7339            }
7340        }
7341        Ok(())
7342    }
7343
7344    /// v4.23: per-row eval that handles correlated subqueries.
7345    /// Equivalent to `eval::eval_expr` when the expression has no
7346    /// subqueries; otherwise clones the expression, substitutes
7347    /// outer-row columns into each surviving subquery node, runs
7348    /// the inner SELECT, and replaces the node with the literal
7349    /// result. Only the WHERE-filter call sites use this path so
7350    /// the uncorrelated fast path is preserved everywhere else.
7351    fn eval_expr_with_correlated(
7352        &self,
7353        expr: &Expr,
7354        row: &Row,
7355        ctx: &EvalContext<'_>,
7356        cancel: CancelToken<'_>,
7357        memo: Option<&mut memoize::MemoizeCache>,
7358    ) -> Result<Value, EngineError> {
7359        if !expr_has_subquery(expr) {
7360            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
7361        }
7362        let mut e = expr.clone();
7363        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
7364        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
7365    }
7366
7367    fn resolve_correlated_in_expr(
7368        &self,
7369        e: &mut Expr,
7370        row: &Row,
7371        ctx: &EvalContext<'_>,
7372        cancel: CancelToken<'_>,
7373        mut memo: Option<&mut memoize::MemoizeCache>,
7374    ) -> Result<(), EngineError> {
7375        match e {
7376            Expr::ScalarSubquery(inner) => {
7377                // v6.2.6 — Memoize: build the cache key from the
7378                // pre-substitution subquery repr + the outer row's
7379                // values. Two outer rows with identical correlated
7380                // values hit the same entry.
7381                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
7382                    subquery_repr: alloc::format!("{}", **inner),
7383                    outer_values: row.values.clone(),
7384                });
7385                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
7386                    && let Some(cached) = cache.get(k)
7387                {
7388                    *e = value_to_literal_expr(cached)?;
7389                    return Ok(());
7390                }
7391                let mut s = (**inner).clone();
7392                substitute_outer_columns(&mut s, row, ctx);
7393                let r = self.exec_select_cancel(&s, cancel)?;
7394                let QueryResult::Rows { rows, .. } = r else {
7395                    return Err(EngineError::Unsupported(
7396                        "scalar subquery: inner did not return rows".into(),
7397                    ));
7398                };
7399                let value = match rows.as_slice() {
7400                    [] => Value::Null,
7401                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
7402                    _ => {
7403                        return Err(EngineError::Unsupported(alloc::format!(
7404                            "scalar subquery returned {} rows; expected 0 or 1",
7405                            rows.len()
7406                        )));
7407                    }
7408                };
7409                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
7410                    cache.insert(k, value.clone());
7411                }
7412                *e = value_to_literal_expr(value)?;
7413            }
7414            Expr::Exists { subquery, negated } => {
7415                let mut s = (**subquery).clone();
7416                substitute_outer_columns(&mut s, row, ctx);
7417                let r = self.exec_select_cancel(&s, cancel)?;
7418                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
7419                let bit = if *negated { !exists } else { exists };
7420                *e = Expr::Literal(Literal::Bool(bit));
7421            }
7422            Expr::InSubquery {
7423                expr: lhs,
7424                subquery,
7425                negated,
7426            } => {
7427                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7428                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
7429                let mut s = (**subquery).clone();
7430                substitute_outer_columns(&mut s, row, ctx);
7431                let r = self.exec_select_cancel(&s, cancel)?;
7432                let QueryResult::Rows { columns, rows, .. } = r else {
7433                    return Err(EngineError::Unsupported(
7434                        "IN-subquery: inner did not return rows".into(),
7435                    ));
7436                };
7437                if columns.len() != 1 {
7438                    return Err(EngineError::Unsupported(alloc::format!(
7439                        "IN-subquery must project exactly one column; got {}",
7440                        columns.len()
7441                    )));
7442                }
7443                let mut found = false;
7444                let mut any_null = false;
7445                for r0 in rows {
7446                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
7447                    if v.is_null() {
7448                        any_null = true;
7449                        continue;
7450                    }
7451                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
7452                        found = true;
7453                        break;
7454                    }
7455                }
7456                let bit = if found {
7457                    !*negated
7458                } else if any_null {
7459                    return Err(EngineError::Unsupported(
7460                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
7461                    ));
7462                } else {
7463                    *negated
7464                };
7465                *e = Expr::Literal(Literal::Bool(bit));
7466            }
7467            Expr::Binary { lhs, rhs, .. } => {
7468                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7469                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
7470            }
7471            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7472                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7473            }
7474            Expr::Like { expr, pattern, .. } => {
7475                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7476                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
7477            }
7478            Expr::FunctionCall { args, .. } => {
7479                for a in args {
7480                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
7481                }
7482            }
7483            Expr::Extract { source, .. } => {
7484                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
7485            }
7486            Expr::WindowFunction { .. }
7487            | Expr::Literal(_)
7488            | Expr::Placeholder(_)
7489            | Expr::Column(_) => {}
7490            // v7.10.10 — recurse children.
7491            Expr::Array(items) => {
7492                for elem in items {
7493                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
7494                }
7495            }
7496            Expr::ArraySubscript { target, index } => {
7497                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
7498                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
7499            }
7500            Expr::AnyAll { expr, array, .. } => {
7501                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7502                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
7503            }
7504            Expr::Case {
7505                operand,
7506                branches,
7507                else_branch,
7508            } => {
7509                if let Some(o) = operand {
7510                    self.resolve_correlated_in_expr(o, row, ctx, cancel, memo.as_deref_mut())?;
7511                }
7512                for (w, t) in branches {
7513                    self.resolve_correlated_in_expr(w, row, ctx, cancel, memo.as_deref_mut())?;
7514                    self.resolve_correlated_in_expr(t, row, ctx, cancel, memo.as_deref_mut())?;
7515                }
7516                if let Some(e) = else_branch {
7517                    self.resolve_correlated_in_expr(e, row, ctx, cancel, memo.as_deref_mut())?;
7518                }
7519            }
7520        }
7521        Ok(())
7522    }
7523
7524    fn subquery_replacement(
7525        &self,
7526        e: &Expr,
7527        cancel: CancelToken<'_>,
7528    ) -> Result<Option<Expr>, EngineError> {
7529        match e {
7530            Expr::ScalarSubquery(inner) => {
7531                let mut s = (**inner).clone();
7532                // Recurse into the inner SELECT first so nested
7533                // subqueries materialise bottom-up.
7534                self.resolve_select_subqueries(&mut s, cancel)?;
7535                let r = match self.exec_bare_select_cancel(&s, cancel) {
7536                    Ok(r) => r,
7537                    Err(e) if is_correlation_error(&e) => return Ok(None),
7538                    Err(e) => return Err(e),
7539                };
7540                let QueryResult::Rows { rows, .. } = r else {
7541                    return Err(EngineError::Unsupported(
7542                        "scalar subquery: inner statement did not return rows".into(),
7543                    ));
7544                };
7545                let value = match rows.as_slice() {
7546                    [] => Value::Null,
7547                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
7548                    _ => {
7549                        return Err(EngineError::Unsupported(alloc::format!(
7550                            "scalar subquery returned {} rows; expected 0 or 1",
7551                            rows.len()
7552                        )));
7553                    }
7554                };
7555                Ok(Some(value_to_literal_expr(value)?))
7556            }
7557            Expr::Exists { subquery, negated } => {
7558                let mut s = (**subquery).clone();
7559                self.resolve_select_subqueries(&mut s, cancel)?;
7560                let r = match self.exec_bare_select_cancel(&s, cancel) {
7561                    Ok(r) => r,
7562                    Err(e) if is_correlation_error(&e) => return Ok(None),
7563                    Err(e) => return Err(e),
7564                };
7565                let exists = match r {
7566                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
7567                    QueryResult::CommandOk { .. } => false,
7568                };
7569                let bit = if *negated { !exists } else { exists };
7570                Ok(Some(Expr::Literal(Literal::Bool(bit))))
7571            }
7572            Expr::InSubquery {
7573                expr,
7574                subquery,
7575                negated,
7576            } => {
7577                let mut s = (**subquery).clone();
7578                self.resolve_select_subqueries(&mut s, cancel)?;
7579                let r = match self.exec_bare_select_cancel(&s, cancel) {
7580                    Ok(r) => r,
7581                    Err(e) if is_correlation_error(&e) => return Ok(None),
7582                    Err(e) => return Err(e),
7583                };
7584                let QueryResult::Rows { columns, rows, .. } = r else {
7585                    return Err(EngineError::Unsupported(
7586                        "IN-subquery: inner statement did not return rows".into(),
7587                    ));
7588                };
7589                if columns.len() != 1 {
7590                    return Err(EngineError::Unsupported(alloc::format!(
7591                        "IN-subquery must project exactly one column; got {}",
7592                        columns.len()
7593                    )));
7594                }
7595                // Build the same OR-Eq chain the parse-time literal-list
7596                // path constructs, with each value lifted into a Literal.
7597                let mut acc: Option<Expr> = None;
7598                for row in rows {
7599                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
7600                    let lit = value_to_literal_expr(v)?;
7601                    let cmp = Expr::Binary {
7602                        lhs: expr.clone(),
7603                        op: BinOp::Eq,
7604                        rhs: Box::new(lit),
7605                    };
7606                    acc = Some(match acc {
7607                        None => cmp,
7608                        Some(prev) => Expr::Binary {
7609                            lhs: Box::new(prev),
7610                            op: BinOp::Or,
7611                            rhs: Box::new(cmp),
7612                        },
7613                    });
7614                }
7615                let combined = acc.unwrap_or(Expr::Literal(Literal::Bool(false)));
7616                let final_expr = if *negated {
7617                    Expr::Unary {
7618                        op: UnOp::Not,
7619                        expr: Box::new(combined),
7620                    }
7621                } else {
7622                    combined
7623                };
7624                Ok(Some(final_expr))
7625            }
7626            _ => Ok(None),
7627        }
7628    }
7629}
7630
7631// ---- v4.12 window-function helpers ----
7632// The (partition-key, order-key, original-index) tuple shape used
7633// across these helpers is intrinsic to the planner. Factoring it
7634// into a typedef adds indirection without making the code clearer,
7635// so several lints are allowed inline on the affected functions
7636// rather than module-wide.
7637
7638/// v4.22: cheap structural scan for `FROM <name>` (qualified or
7639/// not) inside a SELECT — used to verify the anchor of a WITH
7640/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
7641/// FROM joins, subqueries, and unions.
7642fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
7643    if let Some(from) = &stmt.from
7644        && from_refers_to(from, target)
7645    {
7646        return true;
7647    }
7648    for (_, peer) in &stmt.unions {
7649        if select_refers_to(peer, target) {
7650            return true;
7651        }
7652    }
7653    for item in &stmt.items {
7654        if let SelectItem::Expr { expr, .. } = item
7655            && expr_refers_to(expr, target)
7656        {
7657            return true;
7658        }
7659    }
7660    if let Some(w) = &stmt.where_
7661        && expr_refers_to(w, target)
7662    {
7663        return true;
7664    }
7665    false
7666}
7667
7668fn from_refers_to(from: &FromClause, target: &str) -> bool {
7669    if from.primary.name.eq_ignore_ascii_case(target) {
7670        return true;
7671    }
7672    from.joins
7673        .iter()
7674        .any(|j| j.table.name.eq_ignore_ascii_case(target))
7675}
7676
7677fn expr_refers_to(e: &Expr, target: &str) -> bool {
7678    match e {
7679        Expr::ScalarSubquery(s) => select_refers_to(s, target),
7680        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
7681            select_refers_to(subquery, target)
7682        }
7683        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
7684        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7685            expr_refers_to(expr, target)
7686        }
7687        Expr::Like { expr, pattern, .. } => {
7688            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
7689        }
7690        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
7691        Expr::Extract { source, .. } => expr_refers_to(source, target),
7692        Expr::WindowFunction {
7693            args,
7694            partition_by,
7695            order_by,
7696            ..
7697        } => {
7698            args.iter().any(|a| expr_refers_to(a, target))
7699                || partition_by.iter().any(|p| expr_refers_to(p, target))
7700                || order_by.iter().any(|(o, _)| expr_refers_to(o, target))
7701        }
7702        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
7703        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
7704        Expr::ArraySubscript { target: t, index } => {
7705            expr_refers_to(t, target) || expr_refers_to(index, target)
7706        }
7707        Expr::AnyAll { expr, array, .. } => {
7708            expr_refers_to(expr, target) || expr_refers_to(array, target)
7709        }
7710        Expr::Case {
7711            operand,
7712            branches,
7713            else_branch,
7714        } => {
7715            operand.as_deref().is_some_and(|o| expr_refers_to(o, target))
7716                || branches
7717                    .iter()
7718                    .any(|(w, t)| expr_refers_to(w, target) || expr_refers_to(t, target))
7719                || else_branch
7720                    .as_deref()
7721                    .is_some_and(|e| expr_refers_to(e, target))
7722        }
7723    }
7724}
7725
7726/// v4.22: pick more specific column types from observed rows when
7727/// the projection builder defaulted to Text (the v1.x behavior for
7728/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
7729/// land an Int column in the CTE storage table rather than failing
7730/// the insert with "expected TEXT, got INT".
7731fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
7732    let mut out = columns.to_vec();
7733    for (col_idx, col) in out.iter_mut().enumerate() {
7734        if col.ty != DataType::Text {
7735            continue;
7736        }
7737        let mut inferred: Option<DataType> = None;
7738        let mut all_null = true;
7739        for row in rows {
7740            let Some(v) = row.values.get(col_idx) else {
7741                continue;
7742            };
7743            let ty = match v {
7744                Value::Null => continue,
7745                Value::SmallInt(_) => DataType::SmallInt,
7746                Value::Int(_) => DataType::Int,
7747                Value::BigInt(_) => DataType::BigInt,
7748                Value::Float(_) => DataType::Float,
7749                Value::Bool(_) => DataType::Bool,
7750                Value::Vector(_) => DataType::Vector {
7751                    dim: 0,
7752                    encoding: VecEncoding::F32,
7753                },
7754                _ => DataType::Text,
7755            };
7756            all_null = false;
7757            inferred = Some(match inferred {
7758                None => ty,
7759                Some(prev) if prev == ty => prev,
7760                Some(_) => DataType::Text,
7761            });
7762        }
7763        if let Some(t) = inferred {
7764            col.ty = t;
7765            col.nullable = true;
7766        } else if all_null {
7767            col.nullable = true;
7768        }
7769    }
7770    out
7771}
7772
7773/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
7774/// Lines are pushed into `out`; `depth` controls indentation. We
7775/// describe the rewritten SELECT — what the executor *would* do —
7776/// using the engine handle to spot indexed lookups and table shapes.
7777#[allow(clippy::too_many_lines, clippy::format_push_string)]
7778/// v6.2.4 — Walk every line of the rendered plan tree and append
7779/// per-operator stats. Lines that name a known operator get
7780/// `(rows=N)` (`actual_rows` of the top-level operator equals the
7781/// final result row count; scans report their catalog row count
7782/// as the rows-considered metric). Other lines — Filter / Join /
7783/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
7784/// complete-by-construction; v6.2.5 fills these in via inline
7785/// executor counters.
7786/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
7787/// `(table, column)` pair the query touches via WHERE / JOIN
7788/// that doesn't already have an index on the owning table.
7789/// Walks the SELECT's FROM clauses + WHERE expression tree;
7790/// returns one line per missing index. Deterministic order:
7791/// FROM-clause iteration order, then column-reference walk
7792/// order inside each WHERE. Each suggestion is a copy-pastable
7793/// DDL string.
7794fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
7795    use alloc::collections::BTreeSet;
7796    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
7797    let mut out: Vec<String> = Vec::new();
7798    let cat = engine.active_catalog();
7799    // Build a (table, qualifier-or-alias) list from the FROM clause
7800    // so unqualified column refs in WHERE resolve to the correct
7801    // table.
7802    let Some(from) = &stmt.from else {
7803        return out;
7804    };
7805    let mut tables: Vec<String> = Vec::new();
7806    tables.push(from.primary.name.clone());
7807    for j in &from.joins {
7808        tables.push(j.table.name.clone());
7809    }
7810    // Collect column refs from the WHERE expression. JOIN ON
7811    // predicates also feed in.
7812    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
7813    if let Some(w) = &stmt.where_ {
7814        collect_column_refs(w, &mut col_refs);
7815    }
7816    for j in &from.joins {
7817        if let Some(on) = &j.on {
7818            collect_column_refs(on, &mut col_refs);
7819        }
7820    }
7821    for cn in &col_refs {
7822        // Resolve owner table: explicit qualifier first, else
7823        // first table in FROM that has a column of this name.
7824        let owner: Option<String> = if let Some(q) = &cn.qualifier {
7825            tables.iter().find(|t| t == &q).cloned()
7826        } else {
7827            tables.iter().find_map(|t| {
7828                cat.get(t).and_then(|tbl| {
7829                    if tbl.schema().column_position(&cn.name).is_some() {
7830                        Some(t.clone())
7831                    } else {
7832                        None
7833                    }
7834                })
7835            })
7836        };
7837        let Some(owner) = owner else {
7838            continue;
7839        };
7840        let Some(tbl) = cat.get(&owner) else {
7841            continue;
7842        };
7843        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
7844            continue;
7845        };
7846        // Skip if any BTree index already covers this column as
7847        // its key.
7848        let already_indexed = tbl.indices().iter().any(|i| {
7849            matches!(i.kind, spg_storage::IndexKind::BTree(_))
7850                && i.column_position == col_pos
7851                && i.expression.is_none()
7852                && i.partial_predicate.is_none()
7853        });
7854        if already_indexed {
7855            continue;
7856        }
7857        if seen.insert((owner.clone(), cn.name.clone())) {
7858            out.push(alloc::format!(
7859                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
7860                owner,
7861                cn.name,
7862                owner,
7863                cn.name
7864            ));
7865        }
7866    }
7867    out
7868}
7869
7870/// Walks an `Expr` and pushes every `ColumnName` it references.
7871/// Order is depth-first, left-to-right.
7872fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
7873    match expr {
7874        Expr::Column(cn) => out.push(cn.clone()),
7875        Expr::FunctionCall { args, .. } => {
7876            for a in args {
7877                collect_column_refs(a, out);
7878            }
7879        }
7880        Expr::Binary { lhs, rhs, .. } => {
7881            collect_column_refs(lhs, out);
7882            collect_column_refs(rhs, out);
7883        }
7884        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
7885        _ => {}
7886    }
7887}
7888
7889fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
7890    let catalog = engine.active_catalog();
7891    let cold_ids = catalog.cold_segment_ids_global();
7892    let any_cold = !cold_ids.is_empty();
7893    let cold_ids_repr = if any_cold {
7894        let mut s = alloc::string::String::from("[");
7895        for (i, id) in cold_ids.iter().enumerate() {
7896            if i > 0 {
7897                s.push(',');
7898            }
7899            s.push_str(&alloc::format!("{id}"));
7900        }
7901        s.push(']');
7902        s
7903    } else {
7904        alloc::string::String::new()
7905    };
7906    for (idx, line) in lines.iter_mut().enumerate() {
7907        let trimmed = line.trim_start();
7908        let is_top_level = idx == 0;
7909        if is_top_level {
7910            line.push_str(&alloc::format!(" (rows={total_rows})"));
7911            continue;
7912        }
7913        if let Some(rest) = trimmed.strip_prefix("From: ") {
7914            let (name, scan_kind) = match rest.split_once(" [") {
7915                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
7916                None => (rest.trim(), ""),
7917            };
7918            let bare = name.split_whitespace().next().unwrap_or(name);
7919            let hot = catalog.get(bare).map(|t| t.rows().len());
7920            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
7921            // cold-tier segment the scan COULD have walked. v6.2.x
7922            // can tighten to per-table by walking the table's
7923            // BTree-index cold locators.
7924            let annot = match (hot, scan_kind) {
7925                (Some(h), "full scan") => {
7926                    let mut s = alloc::format!(" (hot_rows={h}");
7927                    if any_cold {
7928                        s.push_str(&alloc::format!(
7929                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7930                        ));
7931                    }
7932                    s.push(')');
7933                    s
7934                }
7935                (Some(h), "index seek") => {
7936                    let mut s = alloc::format!(" (hot_rows≤{h}");
7937                    if any_cold {
7938                        s.push_str(&alloc::format!(
7939                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7940                        ));
7941                    }
7942                    s.push(')');
7943                    s
7944                }
7945                _ => " (rows=—)".to_string(),
7946            };
7947            line.push_str(&annot);
7948            continue;
7949        }
7950        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
7951        line.push_str(" (rows=—)");
7952    }
7953}
7954
7955fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
7956    let pad = "  ".repeat(depth);
7957    // 1) Top-level operator label.
7958    let top = if !stmt.ctes.is_empty() {
7959        if stmt.ctes.iter().any(|c| c.recursive) {
7960            "CTEScan (WITH RECURSIVE)"
7961        } else {
7962            "CTEScan (WITH)"
7963        }
7964    } else if !stmt.unions.is_empty() {
7965        "UnionScan"
7966    } else if select_has_window(stmt) {
7967        "WindowAgg"
7968    } else if aggregate::uses_aggregate(stmt) {
7969        "Aggregate"
7970    } else if stmt.distinct {
7971        "Distinct"
7972    } else if stmt.from.is_some() {
7973        "TableScan"
7974    } else {
7975        "Result"
7976    };
7977    out.push(alloc::format!("{pad}{top}"));
7978    let child = "  ".repeat(depth + 1);
7979    // 2) CTE bodies.
7980    for cte in &stmt.ctes {
7981        let head = if cte.recursive {
7982            alloc::format!("{child}CTE (recursive): {}", cte.name)
7983        } else {
7984            alloc::format!("{child}CTE: {}", cte.name)
7985        };
7986        out.push(head);
7987        explain_select(&cte.body, engine, depth + 2, out);
7988    }
7989    // 3) FROM details — primary table + joins, index hits.
7990    if let Some(from) = &stmt.from {
7991        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
7992        if let Some(alias) = &from.primary.alias {
7993            tag.push_str(&alloc::format!(" AS {alias}"));
7994        }
7995        // Try to detect an index-seek opportunity on WHERE against
7996        // the primary table — same heuristic the executor uses.
7997        if let Some(w) = &stmt.where_
7998            && let Some(table) = engine.active_catalog().get(&from.primary.name)
7999        {
8000            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
8001            let cols = &table.schema().columns;
8002            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
8003                tag.push_str(" [index seek]");
8004            } else {
8005                tag.push_str(" [full scan]");
8006            }
8007        } else {
8008            tag.push_str(" [full scan]");
8009        }
8010        out.push(tag);
8011        for j in &from.joins {
8012            let kind = match j.kind {
8013                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
8014                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
8015                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
8016            };
8017            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
8018            if let Some(alias) = &j.table.alias {
8019                s.push_str(&alloc::format!(" AS {alias}"));
8020            }
8021            if j.on.is_some() {
8022                s.push_str(" (ON …)");
8023            }
8024            out.push(s);
8025        }
8026    }
8027    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
8028    if let Some(w) = &stmt.where_ {
8029        let mut s = alloc::format!("{child}Filter: {w}");
8030        if expr_has_subquery(w) {
8031            s.push_str(" [subquery]");
8032        }
8033        out.push(s);
8034    }
8035    if let Some(gs) = &stmt.group_by {
8036        let mut parts = Vec::new();
8037        for g in gs {
8038            parts.push(alloc::format!("{g}"));
8039        }
8040        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
8041    }
8042    if let Some(h) = &stmt.having {
8043        out.push(alloc::format!("{child}Having: {h}"));
8044    }
8045    for o in &stmt.order_by {
8046        let dir = if o.desc { "DESC" } else { "ASC" };
8047        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
8048    }
8049    if let Some(lim) = stmt.limit {
8050        out.push(alloc::format!("{child}Limit: {lim}"));
8051    }
8052    if let Some(off) = stmt.offset {
8053        out.push(alloc::format!("{child}Offset: {off}"));
8054    }
8055    // 5) Projection — collapse Wildcard or render N items.
8056    if stmt
8057        .items
8058        .iter()
8059        .any(|it| matches!(it, SelectItem::Wildcard))
8060    {
8061        out.push(alloc::format!("{child}Project: *"));
8062    } else {
8063        out.push(alloc::format!(
8064            "{child}Project: {} item(s)",
8065            stmt.items.len()
8066        ));
8067    }
8068    // 6) Recurse into UNION peers.
8069    for (kind, peer) in &stmt.unions {
8070        let label = match kind {
8071            UnionKind::All => "UNION ALL",
8072            UnionKind::Distinct => "UNION",
8073        };
8074        out.push(alloc::format!("{child}{label}"));
8075        explain_select(peer, engine, depth + 2, out);
8076    }
8077}
8078
8079/// v4.23: recognise the engine errors that indicate the inner
8080/// SELECT couldn't be evaluated in isolation because it references
8081/// an outer column — used by `subquery_replacement` to skip
8082/// materialisation and let row-eval handle it instead.
8083fn is_correlation_error(e: &EngineError) -> bool {
8084    matches!(
8085        e,
8086        EngineError::Eval(
8087            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
8088        )
8089    )
8090}
8091
8092/// v4.23: walk every Expr in `stmt` and replace each Column ref
8093/// that targets the outer scope (qualifier matches the outer
8094/// table alias) with a Literal carrying the outer row's value.
8095/// Conservative: only qualified refs are substituted, so the user
8096/// must write `outer_alias.col` to reference an outer column. This
8097/// matches PG's lexical scoping for correlated subqueries and
8098/// avoids accidentally rebinding inner columns of the same name.
8099fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
8100    let Some(outer_alias) = ctx.table_alias else {
8101        return;
8102    };
8103    substitute_in_select(stmt, row, ctx, outer_alias);
8104}
8105
8106fn substitute_in_select(
8107    stmt: &mut SelectStatement,
8108    row: &Row,
8109    ctx: &EvalContext<'_>,
8110    outer_alias: &str,
8111) {
8112    for item in &mut stmt.items {
8113        if let SelectItem::Expr { expr, .. } = item {
8114            substitute_in_expr(expr, row, ctx, outer_alias);
8115        }
8116    }
8117    if let Some(w) = &mut stmt.where_ {
8118        substitute_in_expr(w, row, ctx, outer_alias);
8119    }
8120    if let Some(gs) = &mut stmt.group_by {
8121        for g in gs {
8122            substitute_in_expr(g, row, ctx, outer_alias);
8123        }
8124    }
8125    if let Some(h) = &mut stmt.having {
8126        substitute_in_expr(h, row, ctx, outer_alias);
8127    }
8128    for o in &mut stmt.order_by {
8129        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
8130    }
8131    for (_, peer) in &mut stmt.unions {
8132        substitute_in_select(peer, row, ctx, outer_alias);
8133    }
8134}
8135
8136fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
8137    if let Expr::Column(c) = e
8138        && let Some(qual) = &c.qualifier
8139        && qual.eq_ignore_ascii_case(outer_alias)
8140    {
8141        // Look up the column's index in the outer schema.
8142        if let Some(idx) = ctx
8143            .columns
8144            .iter()
8145            .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
8146        {
8147            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
8148            if let Ok(lit) = value_to_literal_expr(v) {
8149                *e = lit;
8150                return;
8151            }
8152        }
8153    }
8154    match e {
8155        Expr::Binary { lhs, rhs, .. } => {
8156            substitute_in_expr(lhs, row, ctx, outer_alias);
8157            substitute_in_expr(rhs, row, ctx, outer_alias);
8158        }
8159        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8160            substitute_in_expr(expr, row, ctx, outer_alias);
8161        }
8162        Expr::Like { expr, pattern, .. } => {
8163            substitute_in_expr(expr, row, ctx, outer_alias);
8164            substitute_in_expr(pattern, row, ctx, outer_alias);
8165        }
8166        Expr::FunctionCall { args, .. } => {
8167            for a in args {
8168                substitute_in_expr(a, row, ctx, outer_alias);
8169            }
8170        }
8171        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
8172        Expr::WindowFunction {
8173            args,
8174            partition_by,
8175            order_by,
8176            ..
8177        } => {
8178            for a in args {
8179                substitute_in_expr(a, row, ctx, outer_alias);
8180            }
8181            for p in partition_by {
8182                substitute_in_expr(p, row, ctx, outer_alias);
8183            }
8184            for (o, _) in order_by {
8185                substitute_in_expr(o, row, ctx, outer_alias);
8186            }
8187        }
8188        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
8189        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
8190            substitute_in_select(subquery, row, ctx, outer_alias);
8191        }
8192        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
8193        Expr::Array(items) => {
8194            for elem in items {
8195                substitute_in_expr(elem, row, ctx, outer_alias);
8196            }
8197        }
8198        Expr::ArraySubscript { target, index } => {
8199            substitute_in_expr(target, row, ctx, outer_alias);
8200            substitute_in_expr(index, row, ctx, outer_alias);
8201        }
8202        Expr::AnyAll { expr, array, .. } => {
8203            substitute_in_expr(expr, row, ctx, outer_alias);
8204            substitute_in_expr(array, row, ctx, outer_alias);
8205        }
8206        Expr::Case {
8207            operand,
8208            branches,
8209            else_branch,
8210        } => {
8211            if let Some(o) = operand {
8212                substitute_in_expr(o, row, ctx, outer_alias);
8213            }
8214            for (w, t) in branches {
8215                substitute_in_expr(w, row, ctx, outer_alias);
8216                substitute_in_expr(t, row, ctx, outer_alias);
8217            }
8218            if let Some(e) = else_branch {
8219                substitute_in_expr(e, row, ctx, outer_alias);
8220            }
8221        }
8222    }
8223}
8224
8225/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
8226/// dedup inside the recursive iteration. Crude but deterministic
8227/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
8228fn encode_row_key(row: &Row) -> Vec<u8> {
8229    let mut out = Vec::new();
8230    for v in &row.values {
8231        let s = alloc::format!("{v:?}|");
8232        out.extend_from_slice(s.as_bytes());
8233    }
8234    out
8235}
8236
8237fn select_has_window(stmt: &SelectStatement) -> bool {
8238    for item in &stmt.items {
8239        if let SelectItem::Expr { expr, .. } = item
8240            && expr_has_window(expr)
8241        {
8242            return true;
8243        }
8244    }
8245    false
8246}
8247
8248fn expr_has_window(e: &Expr) -> bool {
8249    match e {
8250        Expr::WindowFunction { .. } => true,
8251        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
8252        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8253            expr_has_window(expr)
8254        }
8255        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
8256        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
8257        Expr::Extract { source, .. } => expr_has_window(source),
8258        Expr::ScalarSubquery(_)
8259        | Expr::Exists { .. }
8260        | Expr::InSubquery { .. }
8261        | Expr::Literal(_)
8262        | Expr::Placeholder(_)
8263        | Expr::Column(_) => false,
8264        Expr::Array(items) => items.iter().any(expr_has_window),
8265        Expr::ArraySubscript { target, index } => expr_has_window(target) || expr_has_window(index),
8266        Expr::AnyAll { expr, array, .. } => expr_has_window(expr) || expr_has_window(array),
8267        Expr::Case {
8268            operand,
8269            branches,
8270            else_branch,
8271        } => {
8272            operand.as_deref().is_some_and(expr_has_window)
8273                || branches
8274                    .iter()
8275                    .any(|(w, t)| expr_has_window(w) || expr_has_window(t))
8276                || else_branch.as_deref().is_some_and(expr_has_window)
8277        }
8278    }
8279}
8280
8281fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
8282    if let Expr::WindowFunction { .. } = e {
8283        // Deduplicate by structural equality on the expression
8284        // (cheap because window args + partition + order are
8285        // small). Without dedup we'd recompute identical windows
8286        // once per occurrence in the projection.
8287        if !out.iter().any(|x| x == e) {
8288            out.push(e.clone());
8289        }
8290        return;
8291    }
8292    match e {
8293        // Already handled by the early-return at the top.
8294        Expr::WindowFunction { .. } => unreachable!(),
8295        Expr::Binary { lhs, rhs, .. } => {
8296            collect_window_nodes(lhs, out);
8297            collect_window_nodes(rhs, out);
8298        }
8299        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8300            collect_window_nodes(expr, out);
8301        }
8302        Expr::FunctionCall { args, .. } => {
8303            for a in args {
8304                collect_window_nodes(a, out);
8305            }
8306        }
8307        Expr::Like { expr, pattern, .. } => {
8308            collect_window_nodes(expr, out);
8309            collect_window_nodes(pattern, out);
8310        }
8311        Expr::Extract { source, .. } => collect_window_nodes(source, out),
8312        _ => {}
8313    }
8314}
8315
8316fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
8317    if let Expr::WindowFunction { .. } = e
8318        && let Some(idx) = window_nodes.iter().position(|w| w == e)
8319    {
8320        *e = Expr::Column(spg_sql::ast::ColumnName {
8321            qualifier: None,
8322            name: alloc::format!("__win_{idx}"),
8323        });
8324        return;
8325    }
8326    match e {
8327        Expr::Binary { lhs, rhs, .. } => {
8328            rewrite_window_to_columns(lhs, window_nodes);
8329            rewrite_window_to_columns(rhs, window_nodes);
8330        }
8331        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8332            rewrite_window_to_columns(expr, window_nodes);
8333        }
8334        Expr::FunctionCall { args, .. } => {
8335            for a in args {
8336                rewrite_window_to_columns(a, window_nodes);
8337            }
8338        }
8339        Expr::Like { expr, pattern, .. } => {
8340            rewrite_window_to_columns(expr, window_nodes);
8341            rewrite_window_to_columns(pattern, window_nodes);
8342        }
8343        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
8344        _ => {}
8345    }
8346}
8347
8348/// Total order over partition-key tuples. NULL sorts as the
8349/// lowest value (matches the `<` partial order's NULL-last
8350/// behaviour with `INFINITY` flipped).
8351fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
8352    for (x, y) in a.iter().zip(b.iter()) {
8353        let c = value_cmp(x, y);
8354        if c != core::cmp::Ordering::Equal {
8355            return c;
8356        }
8357    }
8358    a.len().cmp(&b.len())
8359}
8360
8361fn order_key_cmp(a: &[(Value, bool)], b: &[(Value, bool)]) -> core::cmp::Ordering {
8362    for ((va, desc), (vb, _)) in a.iter().zip(b.iter()) {
8363        let c = value_cmp(va, vb);
8364        let c = if *desc { c.reverse() } else { c };
8365        if c != core::cmp::Ordering::Equal {
8366            return c;
8367        }
8368    }
8369    a.len().cmp(&b.len())
8370}
8371
8372#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
8373fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
8374    use core::cmp::Ordering;
8375    match (a, b) {
8376        (Value::Null, Value::Null) => Ordering::Equal,
8377        (Value::Null, _) => Ordering::Less,
8378        (_, Value::Null) => Ordering::Greater,
8379        (Value::Int(x), Value::Int(y)) => x.cmp(y),
8380        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
8381        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
8382        (Value::Text(x), Value::Text(y)) => x.cmp(y),
8383        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
8384        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
8385        (Value::Date(x), Value::Date(y)) => x.cmp(y),
8386        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
8387        // Cross-type compare: fall back to the debug rendering —
8388        // same-partition is the goal, exact order is irrelevant.
8389        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
8390    }
8391}
8392
8393/// Compute the window function's per-row output for one partition.
8394/// `slice` has (partition key, order key, original-row-index)
8395/// tuples already sorted by order key. `filtered_rows` is the
8396/// full row list indexed by original-row-index. `out_vals` is
8397/// the destination, also indexed by original-row-index.
8398#[allow(
8399    clippy::too_many_arguments,
8400    clippy::cast_possible_truncation,
8401    clippy::cast_possible_wrap,
8402    clippy::cast_precision_loss,
8403    clippy::cast_sign_loss,
8404    clippy::doc_markdown,
8405    clippy::too_many_lines,
8406    clippy::type_complexity,
8407    clippy::match_same_arms
8408)]
8409fn compute_window_partition(
8410    name: &str,
8411    args: &[Expr],
8412    ordered: bool,
8413    frame: Option<&WindowFrame>,
8414    null_treatment: spg_sql::ast::NullTreatment,
8415    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8416    filtered_rows: &[&Row],
8417    ctx: &EvalContext<'_>,
8418    out_vals: &mut [Value],
8419) -> Result<(), EngineError> {
8420    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
8421    let lower = name.to_ascii_lowercase();
8422    match lower.as_str() {
8423        "row_number" => {
8424            for (rank, (_, _, idx)) in slice.iter().enumerate() {
8425                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
8426            }
8427            Ok(())
8428        }
8429        "rank" => {
8430            let mut prev_key: Option<&[(Value, bool)]> = None;
8431            let mut current_rank: i64 = 1;
8432            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8433                if let Some(p) = prev_key
8434                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8435                {
8436                    current_rank = (i + 1) as i64;
8437                }
8438                if prev_key.is_none() {
8439                    current_rank = 1;
8440                }
8441                out_vals[*idx] = Value::BigInt(current_rank);
8442                prev_key = Some(okey.as_slice());
8443            }
8444            Ok(())
8445        }
8446        "dense_rank" => {
8447            let mut prev_key: Option<&[(Value, bool)]> = None;
8448            let mut current_rank: i64 = 0;
8449            for (_, okey, idx) in slice {
8450                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
8451                    current_rank += 1;
8452                }
8453                out_vals[*idx] = Value::BigInt(current_rank);
8454                prev_key = Some(okey.as_slice());
8455            }
8456            Ok(())
8457        }
8458        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
8459            // Pre-evaluate the function arg per row in the slice
8460            // (count_star has no arg).
8461            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
8462                slice.iter().map(|_| Value::Null).collect()
8463            } else {
8464                slice
8465                    .iter()
8466                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8467                    .collect::<Result<_, _>>()
8468                    .map_err(EngineError::Eval)?
8469            };
8470            // v4.20: pick the effective frame. Explicit frame
8471            // overrides the implicit default (running for ordered,
8472            // whole-partition for unordered).
8473            let eff = effective_frame(frame, ordered)?;
8474            #[allow(clippy::needless_range_loop)]
8475            for i in 0..slice.len() {
8476                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8477                let mut sum: f64 = 0.0;
8478                let mut count: i64 = 0;
8479                let mut min_v: Option<f64> = None;
8480                let mut max_v: Option<f64> = None;
8481                let mut row_count: i64 = 0;
8482                if lo <= hi {
8483                    for j in lo..=hi {
8484                        let v = &arg_values[j];
8485                        match lower.as_str() {
8486                            "count_star" => row_count += 1,
8487                            "count" => {
8488                                if !v.is_null() {
8489                                    count += 1;
8490                                }
8491                            }
8492                            _ => {
8493                                if let Some(x) = value_to_f64(v) {
8494                                    sum += x;
8495                                    count += 1;
8496                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
8497                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
8498                                }
8499                            }
8500                        }
8501                    }
8502                }
8503                let value = match lower.as_str() {
8504                    "count_star" => Value::BigInt(row_count),
8505                    "count" => Value::BigInt(count),
8506                    "sum" => Value::Float(sum),
8507                    "avg" => {
8508                        if count == 0 {
8509                            Value::Null
8510                        } else {
8511                            Value::Float(sum / count as f64)
8512                        }
8513                    }
8514                    "min" => min_v.map_or(Value::Null, Value::Float),
8515                    "max" => max_v.map_or(Value::Null, Value::Float),
8516                    _ => unreachable!(),
8517                };
8518                let (_, _, idx) = &slice[i];
8519                out_vals[*idx] = value;
8520            }
8521            Ok(())
8522        }
8523        "lag" | "lead" => {
8524            // lag(expr [, offset [, default]])
8525            // lead(expr [, offset [, default]])
8526            if args.is_empty() {
8527                return Err(EngineError::Unsupported(alloc::format!(
8528                    "{lower}() requires at least one argument"
8529                )));
8530            }
8531            let offset: i64 = if args.len() >= 2 {
8532                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
8533                    .map_err(EngineError::Eval)?;
8534                match v {
8535                    Value::SmallInt(n) => i64::from(n),
8536                    Value::Int(n) => i64::from(n),
8537                    Value::BigInt(n) => n,
8538                    _ => {
8539                        return Err(EngineError::Unsupported(alloc::format!(
8540                            "{lower}() offset must be integer"
8541                        )));
8542                    }
8543                }
8544            } else {
8545                1
8546            };
8547            let default: Value = if args.len() >= 3 {
8548                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
8549                    .map_err(EngineError::Eval)?
8550            } else {
8551                Value::Null
8552            };
8553            let values: Vec<Value> = slice
8554                .iter()
8555                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8556                .collect::<Result<_, _>>()
8557                .map_err(EngineError::Eval)?;
8558            let n = slice.len();
8559            for (i, (_, _, idx)) in slice.iter().enumerate() {
8560                let signed_offset = if lower == "lag" { -offset } else { offset };
8561                let v = if ignore_nulls {
8562                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
8563                    // skipping NULL values; the `offset`-th non-NULL
8564                    // encountered is the result.
8565                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
8566                    let needed: i64 = signed_offset.abs();
8567                    if needed == 0 {
8568                        values[i].clone()
8569                    } else {
8570                        let mut j: i64 = i as i64;
8571                        let mut hits: i64 = 0;
8572                        let mut found: Option<Value> = None;
8573                        loop {
8574                            j += step;
8575                            if j < 0 || j >= n as i64 {
8576                                break;
8577                            }
8578                            #[allow(clippy::cast_sign_loss)]
8579                            let v = &values[j as usize];
8580                            if !v.is_null() {
8581                                hits += 1;
8582                                if hits == needed {
8583                                    found = Some(v.clone());
8584                                    break;
8585                                }
8586                            }
8587                        }
8588                        found.unwrap_or_else(|| default.clone())
8589                    }
8590                } else {
8591                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
8592                    if target_signed < 0 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX) {
8593                        default.clone()
8594                    } else {
8595                        #[allow(clippy::cast_sign_loss)]
8596                        {
8597                            values[target_signed as usize].clone()
8598                        }
8599                    }
8600                };
8601                out_vals[*idx] = v;
8602            }
8603            Ok(())
8604        }
8605        "first_value" | "last_value" | "nth_value" => {
8606            if args.is_empty() {
8607                return Err(EngineError::Unsupported(alloc::format!(
8608                    "{lower}() requires at least one argument"
8609                )));
8610            }
8611            let values: Vec<Value> = slice
8612                .iter()
8613                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8614                .collect::<Result<_, _>>()
8615                .map_err(EngineError::Eval)?;
8616            let nth: usize = if lower == "nth_value" {
8617                if args.len() < 2 {
8618                    return Err(EngineError::Unsupported(
8619                        "nth_value() requires (expr, n)".into(),
8620                    ));
8621                }
8622                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
8623                    .map_err(EngineError::Eval)?;
8624                let raw = match v {
8625                    Value::SmallInt(n) => i64::from(n),
8626                    Value::Int(n) => i64::from(n),
8627                    Value::BigInt(n) => n,
8628                    _ => {
8629                        return Err(EngineError::Unsupported(
8630                            "nth_value() n must be integer".into(),
8631                        ));
8632                    }
8633                };
8634                if raw < 1 {
8635                    return Err(EngineError::Unsupported(
8636                        "nth_value() n must be >= 1".into(),
8637                    ));
8638                }
8639                #[allow(clippy::cast_sign_loss)]
8640                {
8641                    raw as usize
8642                }
8643            } else {
8644                0
8645            };
8646            let eff = effective_frame(frame, ordered)?;
8647            for i in 0..slice.len() {
8648                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8649                let (_, _, idx) = &slice[i];
8650                let v = if lo > hi {
8651                    Value::Null
8652                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
8653                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
8654                    // selecting the boundary value within the frame.
8655                    if lower == "first_value" {
8656                        (lo..=hi)
8657                            .find_map(|j| {
8658                                let v = &values[j];
8659                                (!v.is_null()).then(|| v.clone())
8660                            })
8661                            .unwrap_or(Value::Null)
8662                    } else {
8663                        (lo..=hi)
8664                            .rev()
8665                            .find_map(|j| {
8666                                let v = &values[j];
8667                                (!v.is_null()).then(|| v.clone())
8668                            })
8669                            .unwrap_or(Value::Null)
8670                    }
8671                } else {
8672                    match lower.as_str() {
8673                        "first_value" => values[lo].clone(),
8674                        "last_value" => values[hi].clone(),
8675                        "nth_value" => {
8676                            let pos = lo + nth - 1;
8677                            if pos > hi {
8678                                Value::Null
8679                            } else {
8680                                values[pos].clone()
8681                            }
8682                        }
8683                        _ => unreachable!(),
8684                    }
8685                };
8686                out_vals[*idx] = v;
8687            }
8688            Ok(())
8689        }
8690        "ntile" => {
8691            if args.is_empty() {
8692                return Err(EngineError::Unsupported(
8693                    "ntile(n) requires an integer argument".into(),
8694                ));
8695            }
8696            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
8697                .map_err(EngineError::Eval)?;
8698            let bucket_count: i64 = match v {
8699                Value::SmallInt(n) => i64::from(n),
8700                Value::Int(n) => i64::from(n),
8701                Value::BigInt(n) => n,
8702                _ => {
8703                    return Err(EngineError::Unsupported(
8704                        "ntile() argument must be integer".into(),
8705                    ));
8706                }
8707            };
8708            if bucket_count < 1 {
8709                return Err(EngineError::Unsupported(
8710                    "ntile() argument must be >= 1".into(),
8711                ));
8712            }
8713            #[allow(clippy::cast_sign_loss)]
8714            let buckets = bucket_count as usize;
8715            let n = slice.len();
8716            // Each bucket gets `base` rows; the first `extras` buckets
8717            // get one extra. PG semantics.
8718            let base = n / buckets;
8719            let extras = n % buckets;
8720            let mut bucket: usize = 1;
8721            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
8722            let mut buckets_with_extra_remaining = extras;
8723            for (_, _, idx) in slice {
8724                if remaining_in_bucket == 0 {
8725                    bucket += 1;
8726                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
8727                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
8728                        base + 1
8729                    } else {
8730                        base
8731                    };
8732                    // Edge: if base==0 and extras==0, all rows fit;
8733                    // shouldn't reach here, but guard anyway.
8734                    if remaining_in_bucket == 0 {
8735                        remaining_in_bucket = 1;
8736                    }
8737                }
8738                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
8739                remaining_in_bucket -= 1;
8740            }
8741            Ok(())
8742        }
8743        "percent_rank" => {
8744            // (rank - 1) / (n - 1) where rank is the standard RANK().
8745            // Single-row partitions get 0.
8746            let n = slice.len();
8747            let mut prev_key: Option<&[(Value, bool)]> = None;
8748            let mut current_rank: i64 = 1;
8749            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8750                if let Some(p) = prev_key
8751                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8752                {
8753                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
8754                }
8755                if prev_key.is_none() {
8756                    current_rank = 1;
8757                }
8758                #[allow(clippy::cast_precision_loss)]
8759                let pr = if n <= 1 {
8760                    0.0
8761                } else {
8762                    (current_rank - 1) as f64 / (n - 1) as f64
8763                };
8764                out_vals[*idx] = Value::Float(pr);
8765                prev_key = Some(okey.as_slice());
8766            }
8767            Ok(())
8768        }
8769        "cume_dist" => {
8770            // # rows up to and including this row's peer group / n.
8771            let n = slice.len();
8772            // First pass: find peer-group-end rank for each row.
8773            for i in 0..slice.len() {
8774                let peer_end = peer_group_end(slice, i);
8775                #[allow(clippy::cast_precision_loss)]
8776                let cd = (peer_end + 1) as f64 / n as f64;
8777                let (_, _, idx) = &slice[i];
8778                out_vals[*idx] = Value::Float(cd);
8779            }
8780            Ok(())
8781        }
8782        other => Err(EngineError::Unsupported(alloc::format!(
8783            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
8784        ))),
8785    }
8786}
8787
8788/// v4.20: resolve the user-provided frame down to a normalised
8789/// `(kind, start, end)`. `None` means default — derive from
8790/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
8791/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
8792/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
8793/// end → CURRENT ROW per the PG spec.
8794fn effective_frame(
8795    frame: Option<&WindowFrame>,
8796    ordered: bool,
8797) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
8798    match frame {
8799        None => {
8800            if ordered {
8801                Ok((
8802                    FrameKind::Range,
8803                    FrameBound::UnboundedPreceding,
8804                    FrameBound::CurrentRow,
8805                ))
8806            } else {
8807                Ok((
8808                    FrameKind::Rows,
8809                    FrameBound::UnboundedPreceding,
8810                    FrameBound::UnboundedFollowing,
8811                ))
8812            }
8813        }
8814        Some(fr) => {
8815            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
8816            // Reject start > end (a few impossible combinations).
8817            if matches!(fr.start, FrameBound::UnboundedFollowing)
8818                || matches!(end, FrameBound::UnboundedPreceding)
8819            {
8820                return Err(EngineError::Unsupported(alloc::format!(
8821                    "invalid frame: start={:?} end={:?}",
8822                    fr.start,
8823                    end
8824                )));
8825            }
8826            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
8827            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
8828            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
8829            // implemented in v4.20.
8830            if fr.kind == FrameKind::Range
8831                && (matches!(
8832                    fr.start,
8833                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8834                ) || matches!(
8835                    end,
8836                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8837                ))
8838            {
8839                return Err(EngineError::Unsupported(
8840                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
8841                ));
8842            }
8843            Ok((fr.kind, fr.start.clone(), end))
8844        }
8845    }
8846}
8847
8848/// Compute `(lo, hi)` row-index bounds inside the partition slice
8849/// for the row at position `i`. Inclusive, clamped to
8850/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
8851#[allow(clippy::type_complexity)]
8852fn frame_bounds_for_row(
8853    eff: &(FrameKind, FrameBound, FrameBound),
8854    i: usize,
8855    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8856) -> (usize, usize) {
8857    let (kind, start, end) = eff;
8858    let n = slice.len();
8859    let last = n.saturating_sub(1);
8860    let (mut lo, mut hi) = match kind {
8861        FrameKind::Rows => {
8862            let lo = match start {
8863                FrameBound::UnboundedPreceding => 0,
8864                FrameBound::OffsetPreceding(k) => {
8865                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8866                    i.saturating_sub(k)
8867                }
8868                FrameBound::CurrentRow => i,
8869                FrameBound::OffsetFollowing(k) => {
8870                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8871                    i.saturating_add(k).min(last)
8872                }
8873                FrameBound::UnboundedFollowing => last,
8874            };
8875            let hi = match end {
8876                FrameBound::UnboundedPreceding => 0,
8877                FrameBound::OffsetPreceding(k) => {
8878                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8879                    i.saturating_sub(k)
8880                }
8881                FrameBound::CurrentRow => i,
8882                FrameBound::OffsetFollowing(k) => {
8883                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8884                    i.saturating_add(k).min(last)
8885                }
8886                FrameBound::UnboundedFollowing => last,
8887            };
8888            (lo, hi)
8889        }
8890        FrameKind::Range => {
8891            // RANGE bounds are peer-aware. With only UNBOUNDED and
8892            // CURRENT ROW supported (rejected at effective_frame for
8893            // explicit offsets), the start/end map to the
8894            // partition's full extent at the same-order-key peer
8895            // group boundary.
8896            let lo = match start {
8897                FrameBound::UnboundedPreceding => 0,
8898                FrameBound::CurrentRow => peer_group_start(slice, i),
8899                FrameBound::UnboundedFollowing => last,
8900                _ => unreachable!("offset bounds rejected for RANGE"),
8901            };
8902            let hi = match end {
8903                FrameBound::UnboundedPreceding => 0,
8904                FrameBound::CurrentRow => peer_group_end(slice, i),
8905                FrameBound::UnboundedFollowing => last,
8906                _ => unreachable!("offset bounds rejected for RANGE"),
8907            };
8908            (lo, hi)
8909        }
8910    };
8911    if hi >= n {
8912        hi = last;
8913    }
8914    if lo >= n {
8915        lo = last;
8916    }
8917    (lo, hi)
8918}
8919
8920/// Find the inclusive index of the first row with the same ORDER
8921/// BY key as `slice[i]`. Slice is already sorted by partition then
8922/// order, so peers are contiguous.
8923#[allow(clippy::type_complexity)]
8924fn peer_group_start(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8925    let key = &slice[i].1;
8926    let mut j = i;
8927    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
8928        j -= 1;
8929    }
8930    j
8931}
8932
8933/// Find the inclusive index of the last row with the same ORDER
8934/// BY key as `slice[i]`.
8935#[allow(clippy::type_complexity)]
8936fn peer_group_end(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8937    let key = &slice[i].1;
8938    let mut j = i;
8939    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
8940        j += 1;
8941    }
8942    j
8943}
8944
8945fn value_to_f64(v: &Value) -> Option<f64> {
8946    match v {
8947        Value::SmallInt(n) => Some(f64::from(*n)),
8948        Value::Int(n) => Some(f64::from(*n)),
8949        #[allow(clippy::cast_precision_loss)]
8950        Value::BigInt(n) => Some(*n as f64),
8951        Value::Float(x) => Some(*x),
8952        _ => None,
8953    }
8954}
8955
8956/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
8957/// projection / `order_by` — saves cloning the AST when there are
8958/// none (the common case).
8959fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
8960    let mut any = false;
8961    for item in &stmt.items {
8962        if let SelectItem::Expr { expr, .. } = item {
8963            any = any || expr_has_subquery(expr);
8964        }
8965    }
8966    if let Some(w) = &stmt.where_ {
8967        any = any || expr_has_subquery(w);
8968    }
8969    if let Some(h) = &stmt.having {
8970        any = any || expr_has_subquery(h);
8971    }
8972    for o in &stmt.order_by {
8973        any = any || expr_has_subquery(&o.expr);
8974    }
8975    for (_, peer) in &stmt.unions {
8976        any = any || expr_tree_has_subquery(peer);
8977    }
8978    any
8979}
8980
8981fn expr_has_subquery(e: &Expr) -> bool {
8982    match e {
8983        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
8984        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
8985        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8986            expr_has_subquery(expr)
8987        }
8988        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
8989        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
8990        Expr::Extract { source, .. } => expr_has_subquery(source),
8991        Expr::WindowFunction {
8992            args,
8993            partition_by,
8994            order_by,
8995            ..
8996        } => {
8997            args.iter().any(expr_has_subquery)
8998                || partition_by.iter().any(expr_has_subquery)
8999                || order_by.iter().any(|(e, _)| expr_has_subquery(e))
9000        }
9001        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
9002        Expr::Array(items) => items.iter().any(expr_has_subquery),
9003        Expr::ArraySubscript { target, index } => {
9004            expr_has_subquery(target) || expr_has_subquery(index)
9005        }
9006        Expr::AnyAll { expr, array, .. } => expr_has_subquery(expr) || expr_has_subquery(array),
9007        Expr::Case {
9008            operand,
9009            branches,
9010            else_branch,
9011        } => {
9012            operand.as_deref().is_some_and(expr_has_subquery)
9013                || branches
9014                    .iter()
9015                    .any(|(w, t)| expr_has_subquery(w) || expr_has_subquery(t))
9016                || else_branch.as_deref().is_some_and(expr_has_subquery)
9017        }
9018    }
9019}
9020
9021/// v4.10 helper: materialise a runtime `Value` back into an AST
9022/// `Expr::Literal` for the subquery-rewrite path. Supports the
9023/// types `Literal` can represent (Integer / Float / Text / Bool /
9024/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
9025/// would lose precision through Literal and aren't supported in
9026/// uncorrelated-subquery results; they error with a clear hint.
9027fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
9028    let lit = match v {
9029        Value::Null => Literal::Null,
9030        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
9031        Value::Int(n) => Literal::Integer(i64::from(n)),
9032        Value::BigInt(n) => Literal::Integer(n),
9033        Value::Float(x) => Literal::Float(x),
9034        Value::Text(s) | Value::Json(s) => Literal::String(s),
9035        Value::Bool(b) => Literal::Bool(b),
9036        other => {
9037            return Err(EngineError::Unsupported(alloc::format!(
9038                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
9039                other.data_type()
9040            )));
9041        }
9042    };
9043    Ok(Expr::Literal(lit))
9044}
9045
9046/// v7.13.0 — wider helper used by `INSERT … SELECT` (mailrs
9047/// round-5 G4). Covers the most common `Value` variants. Types
9048/// that need lossy textual round-trip (BYTEA, arrays, ts*)
9049/// surface as an Unsupported error so the caller can add a cast
9050/// in the inner SELECT.
9051fn value_to_literal_expr_permissive(v: Value) -> Result<Expr, EngineError> {
9052    let lit = match v {
9053        Value::Null => Literal::Null,
9054        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
9055        Value::Int(n) => Literal::Integer(i64::from(n)),
9056        Value::BigInt(n) => Literal::Integer(n),
9057        Value::Float(x) => Literal::Float(x),
9058        Value::Text(s) | Value::Json(s) => Literal::String(s),
9059        Value::Bool(b) => Literal::Bool(b),
9060        Value::Vector(xs) => Literal::Vector(xs),
9061        // Date / Timestamp / Timestamptz / Numeric round-trip
9062        // through a TEXT literal that `coerce_value` re-parses
9063        // against the target column type.
9064        Value::Date(days) => {
9065            let micros = (i64::from(days)) * 86_400_000_000;
9066            Literal::String(format_timestamp_micros_as_date(micros))
9067        }
9068        Value::Timestamp(us) => Literal::String(format_timestamp_micros(us)),
9069        Value::Numeric { scaled, scale } => {
9070            Literal::String(format_numeric(scaled, scale))
9071        }
9072        other => {
9073            return Err(EngineError::Unsupported(alloc::format!(
9074                "INSERT … SELECT cannot materialise value of type {:?}; \
9075                 add an explicit CAST in the inner SELECT",
9076                other.data_type()
9077            )));
9078        }
9079    };
9080    Ok(Expr::Literal(lit))
9081}
9082
9083fn format_timestamp_micros(us: i64) -> String {
9084    // Same Y/M/D split used by the wire layer; epoch-relative.
9085    let days = us.div_euclid(86_400_000_000);
9086    let intra_day = us.rem_euclid(86_400_000_000);
9087    let date = format_timestamp_micros_as_date(days * 86_400_000_000);
9088    let secs = intra_day / 1_000_000;
9089    let us_rem = intra_day % 1_000_000;
9090    let h = (secs / 3600) % 24;
9091    let m = (secs / 60) % 60;
9092    let s = secs % 60;
9093    if us_rem == 0 {
9094        alloc::format!("{date} {h:02}:{m:02}:{s:02}")
9095    } else {
9096        alloc::format!("{date} {h:02}:{m:02}:{s:02}.{us_rem:06}")
9097    }
9098}
9099
9100fn format_timestamp_micros_as_date(us: i64) -> String {
9101    // Days since 1970-01-01 → calendar Y-M-D via the proleptic
9102    // Gregorian conversion used by spg-engine's date helpers.
9103    let days = us.div_euclid(86_400_000_000);
9104    // 1970-01-01 = JDN 2440588.
9105    let jdn = days + 2_440_588;
9106    let (y, mo, d) = jdn_to_ymd(jdn);
9107    alloc::format!("{y:04}-{mo:02}-{d:02}")
9108}
9109
9110fn jdn_to_ymd(jdn: i64) -> (i64, u32, u32) {
9111    // Fliegel & Van Flandern (1968) — works for all positive JDNs.
9112    let l = jdn + 68569;
9113    let n = (4 * l) / 146_097;
9114    let l = l - (146_097 * n + 3) / 4;
9115    let i = (4000 * (l + 1)) / 1_461_001;
9116    let l = l - (1461 * i) / 4 + 31;
9117    let j = (80 * l) / 2447;
9118    let day = (l - (2447 * j) / 80) as u32;
9119    let l = j / 11;
9120    let month = (j + 2 - 12 * l) as u32;
9121    let year = 100 * (n - 49) + i + l;
9122    (year, month, day)
9123}
9124
9125fn format_numeric(scaled: i128, scale: u8) -> String {
9126    if scale == 0 {
9127        return alloc::format!("{scaled}");
9128    }
9129    let abs = scaled.unsigned_abs();
9130    let divisor = 10u128.pow(u32::from(scale));
9131    let whole = abs / divisor;
9132    let frac = abs % divisor;
9133    let sign = if scaled < 0 { "-" } else { "" };
9134    alloc::format!(
9135        "{sign}{whole}.{frac:0width$}",
9136        width = usize::from(scale)
9137    )
9138}
9139
9140/// v6.1.1 — walk the prepared `Statement` AST and replace every
9141/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
9142/// params[n-1]))`. The dispatch downstream sees a `Statement`
9143/// indistinguishable from a simple-query parse, so the exec path
9144/// stays unchanged.
9145///
9146/// Errors fall into one shape: a `$N` references past the bound
9147/// `params.len()`. Out-of-range happens when the Bind didn't
9148/// supply enough values; pgwire surfaces this as a protocol error
9149/// to the client.
9150/// v7.15.0 — rewrite every (potentially-qualified) column
9151/// identifier matching `old` to `new` in a stored SQL source
9152/// string. Used by `ALTER TABLE … RENAME COLUMN` to patch
9153/// CHECK predicate sources, partial-index predicate sources,
9154/// and runtime DEFAULT expression sources before they get
9155/// re-parsed on the next INSERT/UPDATE.
9156///
9157/// Round-trips through the parser, so the rewritten output is
9158/// the canonical Display form (matches what the engine stores
9159/// for fresh predicates). If the source doesn't parse, surfaces
9160/// the parse error — the invariant that stored predicates are
9161/// in canonical Display form means a parse failure here is a
9162/// real bug, not a user mistake to swallow.
9163fn rewrite_column_in_source(
9164    src: &str,
9165    old: &str,
9166    new: &str,
9167) -> Result<alloc::string::String, EngineError> {
9168    let mut expr = spg_sql::parser::parse_expression(src).map_err(|e| {
9169        EngineError::Unsupported(alloc::format!(
9170            "ALTER TABLE RENAME COLUMN: stored predicate source {src:?} \
9171             failed to parse for rewrite ({e})"
9172        ))
9173    })?;
9174    rewrite_column_in_expr(&mut expr, old, new);
9175    Ok(alloc::format!("{expr}"))
9176}
9177
9178/// v7.15.0 — Expr walker that swaps `Expr::Column { name: old, .. }`
9179/// for `Expr::Column { name: new, .. }`. Qualifier is preserved
9180/// (e.g. `t.old` → `t.new`); a foreign-table qualifier still
9181/// gets rewritten because the AST has no way to tell us this
9182/// predicate is on table T versus table T2 — predicate sources
9183/// in SPG are always scoped to the owning table, so any
9184/// qualifier present is either redundant or wrong.
9185fn rewrite_column_in_expr(e: &mut Expr, old: &str, new: &str) {
9186    match e {
9187        Expr::Column(c) => {
9188            if c.name.eq_ignore_ascii_case(old) {
9189                c.name = new.to_string();
9190            }
9191        }
9192        Expr::Binary { lhs, rhs, .. } => {
9193            rewrite_column_in_expr(lhs, old, new);
9194            rewrite_column_in_expr(rhs, old, new);
9195        }
9196        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9197            rewrite_column_in_expr(expr, old, new);
9198        }
9199        Expr::FunctionCall { args, .. } => {
9200            for a in args {
9201                rewrite_column_in_expr(a, old, new);
9202            }
9203        }
9204        Expr::Like { expr, pattern, .. } => {
9205            rewrite_column_in_expr(expr, old, new);
9206            rewrite_column_in_expr(pattern, old, new);
9207        }
9208        Expr::Extract { source, .. } => rewrite_column_in_expr(source, old, new),
9209        Expr::WindowFunction {
9210            args,
9211            partition_by,
9212            order_by,
9213            ..
9214        } => {
9215            for a in args {
9216                rewrite_column_in_expr(a, old, new);
9217            }
9218            for p in partition_by {
9219                rewrite_column_in_expr(p, old, new);
9220            }
9221            for (o, _) in order_by {
9222                rewrite_column_in_expr(o, old, new);
9223            }
9224        }
9225        Expr::Array(items) => {
9226            for elem in items {
9227                rewrite_column_in_expr(elem, old, new);
9228            }
9229        }
9230        Expr::ArraySubscript { target, index } => {
9231            rewrite_column_in_expr(target, old, new);
9232            rewrite_column_in_expr(index, old, new);
9233        }
9234        Expr::AnyAll { expr, array, .. } => {
9235            rewrite_column_in_expr(expr, old, new);
9236            rewrite_column_in_expr(array, old, new);
9237        }
9238        Expr::Case {
9239            operand,
9240            branches,
9241            else_branch,
9242        } => {
9243            if let Some(o) = operand {
9244                rewrite_column_in_expr(o, old, new);
9245            }
9246            for (w, t) in branches {
9247                rewrite_column_in_expr(w, old, new);
9248                rewrite_column_in_expr(t, old, new);
9249            }
9250            if let Some(e) = else_branch {
9251                rewrite_column_in_expr(e, old, new);
9252            }
9253        }
9254        // Stored predicate sources never contain subqueries —
9255        // CHECK / partial-index / runtime_default are all scalar.
9256        // If a future feature changes that, recurse here.
9257        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => {}
9258        Expr::Literal(_) | Expr::Placeholder(_) => {}
9259    }
9260}
9261
9262/// v7.16.0 — walks a parsed statement and replaces every
9263/// `Expr::Placeholder(N)` with the corresponding `params[N-1]`
9264/// re-encoded as an `Expr::Literal`. Used internally by
9265/// `Engine::execute_prepared` AND surfaced for the spg-embedded
9266/// WAL path (which needs the bind-final AST so replay sees a
9267/// simple-query-shaped statement, not a `$1`-shaped one). Errors
9268/// when a placeholder references an index past the params slice.
9269pub fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
9270    match stmt {
9271        Statement::Select(s) => substitute_select(s, params)?,
9272        Statement::Insert(ins) => {
9273            for row in &mut ins.rows {
9274                for e in row {
9275                    substitute_expr(e, params)?;
9276                }
9277            }
9278        }
9279        Statement::Update(u) => {
9280            for (_, e) in &mut u.assignments {
9281                substitute_expr(e, params)?;
9282            }
9283            if let Some(w) = &mut u.where_ {
9284                substitute_expr(w, params)?;
9285            }
9286        }
9287        Statement::Delete(d) => {
9288            if let Some(w) = &mut d.where_ {
9289                substitute_expr(w, params)?;
9290            }
9291        }
9292        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
9293        // Other statements (CREATE / BEGIN / SHOW / …) have no
9294        // expression slots; no walk needed.
9295        _ => {}
9296    }
9297    Ok(())
9298}
9299
9300fn substitute_select(s: &mut SelectStatement, params: &[Value]) -> Result<(), EngineError> {
9301    for item in &mut s.items {
9302        if let SelectItem::Expr { expr, .. } = item {
9303            substitute_expr(expr, params)?;
9304        }
9305    }
9306    if let Some(w) = &mut s.where_ {
9307        substitute_expr(w, params)?;
9308    }
9309    if let Some(gs) = &mut s.group_by {
9310        for g in gs {
9311            substitute_expr(g, params)?;
9312        }
9313    }
9314    if let Some(h) = &mut s.having {
9315        substitute_expr(h, params)?;
9316    }
9317    for o in &mut s.order_by {
9318        substitute_expr(&mut o.expr, params)?;
9319    }
9320    for (_, peer) in &mut s.unions {
9321        substitute_select(peer, params)?;
9322    }
9323    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
9324    // mailrs H2. After this pass each LIMIT/OFFSET that was a
9325    // Placeholder is rewritten to Literal so the existing
9326    // `LimitExpr::as_literal` path consumes a concrete u32.
9327    if let Some(le) = s.limit {
9328        s.limit = Some(resolve_limit_placeholder(le, params)?);
9329    }
9330    if let Some(le) = s.offset {
9331        s.offset = Some(resolve_limit_placeholder(le, params)?);
9332    }
9333    Ok(())
9334}
9335
9336fn resolve_limit_placeholder(
9337    le: spg_sql::ast::LimitExpr,
9338    params: &[Value],
9339) -> Result<spg_sql::ast::LimitExpr, EngineError> {
9340    use spg_sql::ast::LimitExpr;
9341    match le {
9342        LimitExpr::Literal(_) => Ok(le),
9343        LimitExpr::Placeholder(n) => {
9344            let idx = usize::from(n).saturating_sub(1);
9345            let v = params.get(idx).ok_or_else(|| {
9346                EngineError::Eval(EvalError::PlaceholderOutOfRange {
9347                    n,
9348                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
9349                })
9350            })?;
9351            let int = match v {
9352                Value::SmallInt(x) => Some(i64::from(*x)),
9353                Value::Int(x) => Some(i64::from(*x)),
9354                Value::BigInt(x) => Some(*x),
9355                _ => None,
9356            }
9357            .ok_or_else(|| {
9358                EngineError::Unsupported(alloc::format!(
9359                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
9360                ))
9361            })?;
9362            if int < 0 {
9363                return Err(EngineError::Unsupported(alloc::format!(
9364                    "LIMIT/OFFSET ${n} bound to negative value {int}"
9365                )));
9366            }
9367            let bounded = u32::try_from(int).map_err(|_| {
9368                EngineError::Unsupported(alloc::format!(
9369                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
9370                ))
9371            })?;
9372            Ok(LimitExpr::Literal(bounded))
9373        }
9374    }
9375}
9376
9377fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
9378    if let Expr::Placeholder(n) = e {
9379        let idx = usize::from(*n).saturating_sub(1);
9380        let v = params.get(idx).ok_or_else(|| {
9381            EngineError::Eval(EvalError::PlaceholderOutOfRange {
9382                n: *n,
9383                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
9384            })
9385        })?;
9386        *e = Expr::Literal(value_to_literal(v.clone()));
9387        return Ok(());
9388    }
9389    match e {
9390        Expr::Binary { lhs, rhs, .. } => {
9391            substitute_expr(lhs, params)?;
9392            substitute_expr(rhs, params)?;
9393        }
9394        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9395            substitute_expr(expr, params)?;
9396        }
9397        Expr::FunctionCall { args, .. } => {
9398            for a in args {
9399                substitute_expr(a, params)?;
9400            }
9401        }
9402        Expr::Like { expr, pattern, .. } => {
9403            substitute_expr(expr, params)?;
9404            substitute_expr(pattern, params)?;
9405        }
9406        Expr::Extract { source, .. } => substitute_expr(source, params)?,
9407        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
9408        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
9409        Expr::InSubquery { expr, subquery, .. } => {
9410            substitute_expr(expr, params)?;
9411            substitute_select(subquery, params)?;
9412        }
9413        Expr::WindowFunction {
9414            args,
9415            partition_by,
9416            order_by,
9417            ..
9418        } => {
9419            for a in args {
9420                substitute_expr(a, params)?;
9421            }
9422            for p in partition_by {
9423                substitute_expr(p, params)?;
9424            }
9425            for (e, _) in order_by {
9426                substitute_expr(e, params)?;
9427            }
9428        }
9429        Expr::Literal(_) | Expr::Column(_) => {}
9430        // Already handled above.
9431        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
9432        Expr::Array(items) => {
9433            for elem in items {
9434                substitute_expr(elem, params)?;
9435            }
9436        }
9437        Expr::ArraySubscript { target, index } => {
9438            substitute_expr(target, params)?;
9439            substitute_expr(index, params)?;
9440        }
9441        Expr::AnyAll { expr, array, .. } => {
9442            substitute_expr(expr, params)?;
9443            substitute_expr(array, params)?;
9444        }
9445        Expr::Case {
9446            operand,
9447            branches,
9448            else_branch,
9449        } => {
9450            if let Some(o) = operand {
9451                substitute_expr(o, params)?;
9452            }
9453            for (w, t) in branches {
9454                substitute_expr(w, params)?;
9455                substitute_expr(t, params)?;
9456            }
9457            if let Some(e) = else_branch {
9458                substitute_expr(e, params)?;
9459            }
9460        }
9461    }
9462    Ok(())
9463}
9464
9465/// v6.1.1 — convert a runtime `Value` into the closest matching
9466/// `Literal` for the substitute walker. Lossless for the simple
9467/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
9468/// / Json / Interval render as their canonical text form so the
9469/// downstream coerce_value can re-parse against the target column
9470/// type. SQ8 / HalfVector cells are NOT expected as bind params;
9471/// pgwire's Bind decodes vector params to the f32 representation
9472/// before they reach this helper.
9473/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
9474/// column's non-NULL sample before histogram building. Cross-type
9475/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
9476/// same widening the eval-side `compare` operator uses; everything
9477/// else (the genuinely-incompatible pairs) falls back to ordering
9478/// by canonical string form so the sort is still total + stable.
9479/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
9480/// here only via the string-fallback path because vector columns
9481/// are filtered out upstream.
9482fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
9483    use core::cmp::Ordering;
9484    match (a, b) {
9485        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
9486        (Value::Int(a), Value::Int(b)) => a.cmp(b),
9487        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
9488        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
9489        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
9490        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9491        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
9492        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9493        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
9494        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
9495        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
9496        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
9497        (Value::Date(a), Value::Date(b)) => a.cmp(b),
9498        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
9499        // Mixed numeric/float — widen to f64 and compare.
9500        (Value::SmallInt(n), Value::Float(x)) => {
9501            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9502        }
9503        (Value::Float(x), Value::SmallInt(n)) => {
9504            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
9505        }
9506        (Value::Int(n), Value::Float(x)) => {
9507            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9508        }
9509        (Value::Float(x), Value::Int(n)) => {
9510            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
9511        }
9512        (Value::BigInt(n), Value::Float(x)) => {
9513            #[allow(clippy::cast_precision_loss)]
9514            let nf = *n as f64;
9515            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
9516        }
9517        (Value::Float(x), Value::BigInt(n)) => {
9518            #[allow(clippy::cast_precision_loss)]
9519            let nf = *n as f64;
9520            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
9521        }
9522        // Cross-type fallback: lexicographic on canonical form.
9523        // Total + stable so the sort is well-defined.
9524        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
9525    }
9526}
9527
9528/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
9529/// string for the `spg_statistic.histogram_bounds` column. Values
9530/// containing `,` or `[` / `]` are JSON-style escaped so the
9531/// rendering round-trips through a future parser; v6.2.0 only
9532/// uses the rendered form for human consumption, so the escaping
9533/// is conservative.
9534fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
9535    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
9536    out.push('[');
9537    for (i, b) in bounds.iter().enumerate() {
9538        if i > 0 {
9539            out.push_str(", ");
9540        }
9541        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
9542        if needs_quote {
9543            out.push('"');
9544            for ch in b.chars() {
9545                if ch == '"' || ch == '\\' {
9546                    out.push('\\');
9547                }
9548                out.push(ch);
9549            }
9550            out.push('"');
9551        } else {
9552            out.push_str(b);
9553        }
9554    }
9555    out.push(']');
9556    out
9557}
9558
9559/// v6.2.0 — canonical textual form of a `Value` for histogram
9560/// bound storage. Strings used by ANALYZE for sort + bound output.
9561/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
9562/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
9563/// the same form `format_date` / `format_timestamp` produce for
9564/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
9565/// reach this only via a non-Vector column (vector columns are
9566/// skipped upstream); they fall back to a Debug-derived form so
9567/// stats still serialise without crashing.
9568pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
9569    match v {
9570        Value::Null => "NULL".to_string(),
9571        Value::SmallInt(n) => alloc::format!("{n}"),
9572        Value::Int(n) => alloc::format!("{n}"),
9573        Value::BigInt(n) => alloc::format!("{n}"),
9574        Value::Float(x) => alloc::format!("{x:?}"),
9575        Value::Text(s) | Value::Json(s) => s.clone(),
9576        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
9577        Value::Date(d) => eval::format_date(*d),
9578        Value::Timestamp(t) => eval::format_timestamp(*t),
9579        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
9580        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
9581        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
9582            // Unreachable in practice (vector columns are filtered
9583            // out before this). Defensive fallback so a future
9584            // vector-stats path doesn't crash.
9585            alloc::format!("{v:?}")
9586        }
9587        // v7.5.0 — Value is #[non_exhaustive] for downstream
9588        // forward-compat. Future variants fall through to Debug
9589        // form here (same shape as the vector fallback above).
9590        _ => alloc::format!("{v:?}"),
9591    }
9592}
9593
9594/// v6.2.0 — true for engine-managed catalog tables that the bare
9595/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
9596/// tables yet (publications / subscriptions / users / statistics
9597/// all live as engine fields, not catalog tables), so this is a
9598/// reserved future-proofing hook — every existing user table is
9599/// analysed.
9600const fn is_internal_table_name(_name: &str) -> bool {
9601    false
9602}
9603
9604fn value_to_literal(v: Value) -> Literal {
9605    match v {
9606        Value::Null => Literal::Null,
9607        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
9608        Value::Int(n) => Literal::Integer(i64::from(n)),
9609        Value::BigInt(n) => Literal::Integer(n),
9610        Value::Float(x) => Literal::Float(x),
9611        Value::Text(s) | Value::Json(s) => Literal::String(s),
9612        Value::Bool(b) => Literal::Bool(b),
9613        Value::Vector(v) => Literal::Vector(v),
9614        Value::Numeric { scaled, scale } => Literal::String(eval::format_numeric(scaled, scale)),
9615        Value::Date(d) => Literal::String(eval::format_date(d)),
9616        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
9617        // v7.16.0 — BYTEA round-trip for the spg-sqlx Bind path.
9618        // PG-canonical text rep is `\x` + lowercase hex; the
9619        // engine's coerce_value already accepts that on the
9620        // text → bytea direction.
9621        Value::Bytes(b) => Literal::String(eval::format_bytea_hex(&b)),
9622        // v7.16.0 — array round-trip for the spg-sqlx Bind
9623        // path. Render as PG external form `{a,b,c}`; the
9624        // engine's text → array coerce (just below in
9625        // coerce_value) accepts it on the matching column type.
9626        Value::TextArray(items) => Literal::String(eval::format_text_array(&items)),
9627        Value::IntArray(items) => Literal::String(eval::format_int_array(&items)),
9628        Value::BigIntArray(items) => Literal::String(eval::format_bigint_array(&items)),
9629        Value::Interval { months, micros } => Literal::Interval {
9630            months,
9631            micros,
9632            text: eval::format_interval(months, micros),
9633        },
9634        // SQ8 / halfvec cells dequantise to f32 before reaching the
9635        // substitute walker; pgwire's Bind path handles that.
9636        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
9637        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
9638        // v7.5.0 — Value is #[non_exhaustive]; future variants
9639        // render as Debug-form String literal until explicit
9640        // mapping is added.
9641        v => Literal::String(alloc::format!("{v:?}")),
9642    }
9643}
9644
9645fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
9646    let Some(now) = now_micros else {
9647        return;
9648    };
9649    match stmt {
9650        Statement::Select(s) => rewrite_select_clock(s, now),
9651        Statement::Insert(ins) => {
9652            for row in &mut ins.rows {
9653                for e in row {
9654                    rewrite_expr_clock(e, now);
9655                }
9656            }
9657        }
9658        _ => {}
9659    }
9660}
9661
9662fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
9663    for item in &mut s.items {
9664        if let SelectItem::Expr { expr, .. } = item {
9665            rewrite_expr_clock(expr, now);
9666        }
9667    }
9668    if let Some(w) = &mut s.where_ {
9669        rewrite_expr_clock(w, now);
9670    }
9671    if let Some(gs) = &mut s.group_by {
9672        for g in gs {
9673            rewrite_expr_clock(g, now);
9674        }
9675    }
9676    if let Some(h) = &mut s.having {
9677        rewrite_expr_clock(h, now);
9678    }
9679    for o in &mut s.order_by {
9680        rewrite_expr_clock(&mut o.expr, now);
9681    }
9682    for (_, peer) in &mut s.unions {
9683        rewrite_select_clock(peer, now);
9684    }
9685}
9686
9687/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
9688/// Literal / Column-with-qualifier (the dominant cases on a typical
9689/// AST) take a single pattern dispatch and exit. The clock-rewrite
9690/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
9691/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
9692/// refs) sit on their own arms with match guards so the fall-through
9693/// to the recursive arms is unambiguous.
9694fn rewrite_expr_clock(e: &mut Expr, now: i64) {
9695    // Fast-path test on the no-recursion shapes first. We can't fold
9696    // them into the big match below because they need to *replace* `e`
9697    // outright; the recursive arms below match on its sub-fields.
9698    if let Some(replacement) = clock_replacement_for(e, now) {
9699        *e = replacement;
9700        return;
9701    }
9702    match e {
9703        Expr::Binary { lhs, rhs, .. } => {
9704            rewrite_expr_clock(lhs, now);
9705            rewrite_expr_clock(rhs, now);
9706        }
9707        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9708            rewrite_expr_clock(expr, now);
9709        }
9710        Expr::FunctionCall { args, .. } => {
9711            for a in args {
9712                rewrite_expr_clock(a, now);
9713            }
9714        }
9715        Expr::Like { expr, pattern, .. } => {
9716            rewrite_expr_clock(expr, now);
9717            rewrite_expr_clock(pattern, now);
9718        }
9719        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
9720        // v4.10 subquery nodes — recurse into the inner SELECT's
9721        // expression slots so e.g. SELECT NOW() in a scalar
9722        // subquery picks up the same instant as the outer query.
9723        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
9724        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
9725        Expr::InSubquery { expr, subquery, .. } => {
9726            rewrite_expr_clock(expr, now);
9727            rewrite_select_clock(subquery, now);
9728        }
9729        // v4.12 window functions — args + PARTITION BY + ORDER BY
9730        // may all reference clock literals.
9731        Expr::WindowFunction {
9732            args,
9733            partition_by,
9734            order_by,
9735            ..
9736        } => {
9737            for a in args {
9738                rewrite_expr_clock(a, now);
9739            }
9740            for p in partition_by {
9741                rewrite_expr_clock(p, now);
9742            }
9743            for (e, _) in order_by {
9744                rewrite_expr_clock(e, now);
9745            }
9746        }
9747        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
9748        Expr::Array(items) => {
9749            for elem in items {
9750                rewrite_expr_clock(elem, now);
9751            }
9752        }
9753        Expr::ArraySubscript { target, index } => {
9754            rewrite_expr_clock(target, now);
9755            rewrite_expr_clock(index, now);
9756        }
9757        Expr::AnyAll { expr, array, .. } => {
9758            rewrite_expr_clock(expr, now);
9759            rewrite_expr_clock(array, now);
9760        }
9761        Expr::Case {
9762            operand,
9763            branches,
9764            else_branch,
9765        } => {
9766            if let Some(o) = operand {
9767                rewrite_expr_clock(o, now);
9768            }
9769            for (w, t) in branches {
9770                rewrite_expr_clock(w, now);
9771                rewrite_expr_clock(t, now);
9772            }
9773            if let Some(e) = else_branch {
9774                rewrite_expr_clock(e, now);
9775            }
9776        }
9777    }
9778}
9779
9780/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
9781/// must be rewritten; otherwise `None` so the caller falls through to
9782/// the recursive walk. Identifies both function-call forms (`NOW()` /
9783/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
9784/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
9785/// which is how PG accepts them without parens).
9786fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
9787    let (kind, name) = match e {
9788        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
9789        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
9790        _ => return None,
9791    };
9792    // ASCII case-insensitive name match. Limited to the three keywords
9793    // that actually need rewriting.
9794    let matched = match name.len() {
9795        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => Some(true),
9796        12 if name.eq_ignore_ascii_case("current_date") => Some(false),
9797        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(true),
9798        _ => None,
9799    };
9800    let is_timestamp = matched?;
9801    let payload = if is_timestamp {
9802        now
9803    } else {
9804        now.div_euclid(86_400_000_000)
9805    };
9806    let target = if is_timestamp {
9807        spg_sql::ast::CastTarget::Timestamp
9808    } else {
9809        spg_sql::ast::CastTarget::Date
9810    };
9811    Some(Expr::Cast {
9812        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
9813        target,
9814    })
9815}
9816
9817#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9818enum ClockSite {
9819    Fn,
9820    BareIdent,
9821}
9822
9823/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
9824/// Swap the integer literal for the matching item's expression so the
9825/// executor doesn't need a special-case branch. Recurses into UNION
9826/// peers because each peer keeps its own SELECT list.
9827/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
9828/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
9829/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
9830/// and groups by whatever explicit non-aggregates remain — none in
9831/// the wildcard-only case, which still works for non-aggregate
9832/// queries).
9833fn expand_group_by_all(s: &mut SelectStatement) {
9834    if !s.group_by_all {
9835        for (_, peer) in &mut s.unions {
9836            expand_group_by_all(peer);
9837        }
9838        return;
9839    }
9840    let mut groups: Vec<Expr> = Vec::new();
9841    for item in &s.items {
9842        if let SelectItem::Expr { expr, .. } = item
9843            && !aggregate::contains_aggregate(expr)
9844        {
9845            groups.push(expr.clone());
9846        }
9847    }
9848    s.group_by = Some(groups);
9849    s.group_by_all = false;
9850    for (_, peer) in &mut s.unions {
9851        expand_group_by_all(peer);
9852    }
9853}
9854
9855fn resolve_order_by_position(s: &mut SelectStatement) {
9856    // v6.4.0 — iterate every ORDER BY key. Position references
9857    // (`ORDER BY 2`) bind to the 1-based projection index;
9858    // identifier references that match a SELECT-list alias bind to
9859    // the projected expression (Step 4 of L3a).
9860    for order in &mut s.order_by {
9861        match &order.expr {
9862            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
9863                if let Ok(idx_one_based) = usize::try_from(*n) {
9864                    let idx = idx_one_based - 1;
9865                    if idx < s.items.len()
9866                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
9867                    {
9868                        order.expr = expr.clone();
9869                    }
9870                }
9871            }
9872            Expr::Column(c) if c.qualifier.is_none() => {
9873                // Alias-in-ORDER-BY lookup.
9874                for item in &s.items {
9875                    if let SelectItem::Expr {
9876                        expr,
9877                        alias: Some(a),
9878                    } = item
9879                        && a == &c.name
9880                    {
9881                        order.expr = expr.clone();
9882                        break;
9883                    }
9884                }
9885            }
9886            _ => {}
9887        }
9888    }
9889    for (_, peer) in &mut s.unions {
9890        resolve_order_by_position(peer);
9891    }
9892}
9893
9894/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
9895/// Used by the UNION ORDER BY path; per-block paths inline the same
9896/// comparator because they already hold `&OrderBy` directly.
9897/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
9898/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
9899/// partition the prefix in O(n), then sort just that prefix in O(k
9900/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
9901/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
9902/// full-sort behaviour.
9903///
9904/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
9905/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
9906fn partial_sort_tagged(tagged: &mut Vec<(Vec<f64>, Row)>, keep: Option<usize>, descs: &[bool]) {
9907    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
9908    match keep {
9909        Some(k) if k < tagged.len() && k > 0 => {
9910            let pivot = k - 1;
9911            tagged.select_nth_unstable_by(pivot, cmp);
9912            tagged[..k].sort_by(cmp);
9913            tagged.truncate(k);
9914        }
9915        _ => {
9916            tagged.sort_by(cmp);
9917        }
9918    }
9919}
9920
9921fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
9922    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
9923}
9924
9925/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
9926/// flag is honored independently. NULL is encoded as `f64::INFINITY`
9927/// so it sorts last in ASC and first in DESC (matches PG default).
9928fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
9929    use core::cmp::Ordering;
9930    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
9931        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
9932        let ord = if descs.get(i).copied().unwrap_or(false) {
9933            ord.reverse()
9934        } else {
9935            ord
9936        };
9937        if ord != Ordering::Equal {
9938            return ord;
9939        }
9940    }
9941    Ordering::Equal
9942}
9943
9944/// v6.4.0 — eval every ORDER BY expression for a row and pack the
9945/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
9946fn build_order_keys(
9947    order_by: &[OrderBy],
9948    row: &Row,
9949    ctx: &EvalContext,
9950) -> Result<Vec<f64>, EngineError> {
9951    let mut keys = Vec::with_capacity(order_by.len());
9952    for o in order_by {
9953        let v = eval::eval_expr(&o.expr, row, ctx)?;
9954        keys.push(value_to_order_key(&v)?);
9955    }
9956    Ok(keys)
9957}
9958
9959/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
9960/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
9961/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
9962fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
9963    if let Some(off) = offset {
9964        let off = off as usize;
9965        if off >= rows.len() {
9966            rows.clear();
9967        } else {
9968            rows.drain(..off);
9969        }
9970    }
9971    if let Some(n) = limit {
9972        rows.truncate(n as usize);
9973    }
9974}
9975
9976/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
9977/// names + parent table name) into the storage-layer shape (column
9978/// indices + same parent table). Validates everything the engine
9979/// needs to know about the FK at CREATE TABLE time:
9980///
9981///   - parent table exists (catalog lookup, unless self-referencing)
9982///   - parent columns exist on the parent table
9983///   - parent column list matches the local arity (defaults to the
9984///     parent's primary index column when omitted)
9985///   - parent columns are covered by a `BTree` UNIQUE-class index
9986///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
9987///     the v7.6.2 INSERT path can do an O(log n) parent lookup
9988///   - local columns exist on the table being created
9989fn resolve_foreign_key(
9990    local_table_name: &str,
9991    local_cols: &[ColumnSchema],
9992    fk: spg_sql::ast::ForeignKeyConstraint,
9993    catalog: &Catalog,
9994) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
9995    // Resolve local columns.
9996    let mut local_columns = Vec::with_capacity(fk.columns.len());
9997    for name in &fk.columns {
9998        let pos = local_cols
9999            .iter()
10000            .position(|c| c.name == *name)
10001            .ok_or_else(|| {
10002                EngineError::Unsupported(alloc::format!(
10003                    "FOREIGN KEY references unknown local column {name:?}"
10004                ))
10005            })?;
10006        local_columns.push(pos);
10007    }
10008    // Self-referencing FK: parent table is the one we're creating.
10009    // The parent column resolution uses the local column list since
10010    // the catalog doesn't have this table yet.
10011    let is_self_ref = fk.parent_table == local_table_name;
10012    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
10013        (local_cols, local_table_name)
10014    } else {
10015        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
10016            EngineError::Storage(StorageError::TableNotFound {
10017                name: fk.parent_table.clone(),
10018            })
10019        })?;
10020        (
10021            parent_table.schema().columns.as_slice(),
10022            fk.parent_table.as_str(),
10023        )
10024    };
10025    // Resolve parent column names → positions. If the FK omitted the
10026    // parent column list, fall back to the parent's primary index
10027    // column (single-column only — composite default is rejected
10028    // because there's no unambiguous "PK" in SPG's index list).
10029    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
10030        if fk.columns.len() != 1 {
10031            return Err(EngineError::Unsupported(
10032                "composite FOREIGN KEY without explicit parent column list is not supported \
10033                 — list the parent columns explicitly"
10034                    .into(),
10035            ));
10036        }
10037        // Find a single BTree index on the parent and use its column.
10038        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
10039            .ok_or_else(|| {
10040                EngineError::Unsupported(alloc::format!(
10041                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
10042                     to default the FOREIGN KEY against"
10043                ))
10044            })?;
10045        alloc::vec![pos]
10046    } else {
10047        let mut out = Vec::with_capacity(fk.parent_columns.len());
10048        for name in &fk.parent_columns {
10049            let pos = parent_cols_for_lookup
10050                .iter()
10051                .position(|c| c.name == *name)
10052                .ok_or_else(|| {
10053                    EngineError::Unsupported(alloc::format!(
10054                        "FOREIGN KEY references unknown parent column \
10055                         {name:?} on table {parent_table_str:?}"
10056                    ))
10057                })?;
10058            out.push(pos);
10059        }
10060        out
10061    };
10062    if parent_columns.len() != local_columns.len() {
10063        return Err(EngineError::Unsupported(alloc::format!(
10064            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
10065            local_columns.len(),
10066            parent_columns.len()
10067        )));
10068    }
10069    // For non-self-referencing FKs, verify the parent column set is
10070    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
10071    // declaration; the convention is "the parent column for FK
10072    // purposes must have a BTree index" — which the user creates via
10073    // `CREATE INDEX ... USING btree (col)` (the default). We accept
10074    // any single-column BTree index that covers a parent column;
10075    // composite parent column lists require an index whose `column_position`
10076    // matches the first parent column (multi-column BTree indices
10077    // are not in the v7.x roadmap).
10078    if !is_self_ref {
10079        let parent_table = catalog.get(&fk.parent_table).expect("checked above");
10080        let primary_parent_col = parent_columns[0];
10081        let has_btree = parent_table
10082            .schema()
10083            .columns
10084            .get(primary_parent_col)
10085            .is_some()
10086            && parent_table.indices().iter().any(|idx| {
10087                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10088                    && idx.column_position == primary_parent_col
10089                    && idx.partial_predicate.is_none()
10090            });
10091        if !has_btree {
10092            return Err(EngineError::Unsupported(alloc::format!(
10093                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
10094                 index — create one with `CREATE INDEX ... ON {} ({})` first",
10095                parent_table_str,
10096                parent_table_str,
10097                parent_table.schema().columns[primary_parent_col].name,
10098            )));
10099        }
10100    }
10101    let on_delete = fk_action_sql_to_storage(fk.on_delete);
10102    let on_update = fk_action_sql_to_storage(fk.on_update);
10103    Ok(spg_storage::ForeignKeyConstraint {
10104        name: fk.name,
10105        local_columns,
10106        parent_table: fk.parent_table,
10107        parent_columns,
10108        on_delete,
10109        on_update,
10110    })
10111}
10112
10113/// v7.6.1 — pick a sentinel "primary key" column from the parent
10114/// table when the FK didn't name parent columns. Picks the first
10115/// single-column unconditional BTree index — that's the closest
10116/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
10117/// `local_cols` as the column source.
10118fn pick_pk_index_column(
10119    catalog: &Catalog,
10120    parent_name: &str,
10121    is_self_ref: bool,
10122    local_cols: &[ColumnSchema],
10123) -> Option<usize> {
10124    if is_self_ref {
10125        // Self-ref FK omitted parent columns: pick column 0 by
10126        // convention (no catalog entry yet). Engine will widen this
10127        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
10128        let _ = local_cols;
10129        return Some(0);
10130    }
10131    let parent = catalog.get(parent_name)?;
10132    parent.indices().iter().find_map(|idx| {
10133        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10134            && idx.partial_predicate.is_none()
10135            && idx.included_columns.is_empty()
10136            && idx.expression.is_none()
10137        {
10138            Some(idx.column_position)
10139        } else {
10140            None
10141        }
10142    })
10143}
10144
10145/// v7.9.8 / v7.9.10 — resolve the column positions that
10146/// identify a conflict for ON CONFLICT. Returns a Vec of
10147/// column positions (1 element for single-column form, N for
10148/// composite). When the user wrote bare `ON CONFLICT DO …`,
10149/// falls back to the table's first unconditional BTree index
10150/// (always single-column today).
10151fn resolve_on_conflict_columns(
10152    catalog: &Catalog,
10153    table_name: &str,
10154    target: &[String],
10155) -> Result<Vec<usize>, EngineError> {
10156    let table = catalog.get(table_name).ok_or_else(|| {
10157        EngineError::Storage(StorageError::TableNotFound {
10158            name: table_name.into(),
10159        })
10160    })?;
10161    if target.is_empty() {
10162        // v7.13.2 — mailrs round-6 S5 follow-up. Composite UNIQUE
10163        // constraints carry a multi-column tuple; the prior code
10164        // path picked only the leading column of the first BTree
10165        // index, which caused `ON CONFLICT DO NOTHING` to dedup
10166        // by leading column alone (3 rows with same group_id but
10167        // different permission collapsed to 1). PG semantics use
10168        // the full tuple. Prefer a UniquenessConstraint's full
10169        // column list when one exists; fall back to the leading
10170        // BTree column for legacy single-column UNIQUE.
10171        if let Some(uc) = table.schema().uniqueness_constraints.first() {
10172            return Ok(uc.columns.clone());
10173        }
10174        let pos = table
10175            .indices()
10176            .iter()
10177            .find_map(|idx| {
10178                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10179                    && idx.partial_predicate.is_none()
10180                    && idx.included_columns.is_empty()
10181                    && idx.expression.is_none()
10182                {
10183                    Some(idx.column_position)
10184                } else {
10185                    None
10186                }
10187            })
10188            .ok_or_else(|| {
10189                EngineError::Unsupported(alloc::format!(
10190                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
10191                ))
10192            })?;
10193        return Ok(alloc::vec![pos]);
10194    }
10195    let mut out = Vec::with_capacity(target.len());
10196    for name in target {
10197        let pos = table
10198            .schema()
10199            .columns
10200            .iter()
10201            .position(|c| c.name == *name)
10202            .ok_or_else(|| {
10203                EngineError::Unsupported(alloc::format!(
10204                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
10205                ))
10206            })?;
10207        out.push(pos);
10208    }
10209    Ok(out)
10210}
10211
10212/// v7.9.8 — check whether the BTree index on `column_pos` of
10213/// `table_name` already has a row with this key.
10214fn on_conflict_key_exists(
10215    catalog: &Catalog,
10216    table_name: &str,
10217    column_pos: usize,
10218    key: &Value,
10219) -> bool {
10220    let Some(table) = catalog.get(table_name) else {
10221        return false;
10222    };
10223    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
10224        return false;
10225    };
10226    table.indices().iter().any(|idx| {
10227        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10228            && idx.column_position == column_pos
10229            && idx.partial_predicate.is_none()
10230            && !idx.lookup_eq(&idx_key).is_empty()
10231    })
10232}
10233
10234/// v7.9.9 / v7.9.10 — look up an existing row's position by
10235/// matching all `column_positions` against the incoming `key`
10236/// tuple. Single-column shape (one column) reduces to the
10237/// canonical PK lookup; composite shapes scan linearly until
10238/// every position matches.
10239fn lookup_row_position_by_keys(
10240    catalog: &Catalog,
10241    table_name: &str,
10242    column_positions: &[usize],
10243    key: &[&Value],
10244) -> Option<usize> {
10245    let table = catalog.get(table_name)?;
10246    table.rows().iter().position(|r| {
10247        column_positions
10248            .iter()
10249            .enumerate()
10250            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
10251    })
10252}
10253
10254/// v7.9.10 — does the table already contain a row whose
10255/// `column_positions` tuple equals `key`? Single-column shape
10256/// uses the existing BTree fast path; composite shapes fall
10257/// back to a row scan.
10258fn on_conflict_keys_exist(
10259    catalog: &Catalog,
10260    table_name: &str,
10261    column_positions: &[usize],
10262    key: &[&Value],
10263) -> bool {
10264    if column_positions.len() == 1 {
10265        return on_conflict_key_exists(catalog, table_name, column_positions[0], key[0]);
10266    }
10267    let Some(table) = catalog.get(table_name) else {
10268        return false;
10269    };
10270    table.rows().iter().any(|r| {
10271        column_positions
10272            .iter()
10273            .enumerate()
10274            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
10275    })
10276}
10277
10278/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
10279/// existing row.
10280///
10281/// `incoming` is the rejected INSERT row (used to resolve
10282/// `EXCLUDED.col` references in the assignment exprs);
10283/// `target_pos` is the position of the existing row in the table.
10284/// Each assignment substitutes `EXCLUDED.col` with the matching
10285/// incoming value, evaluates the resulting expression against
10286/// the existing row, and writes the new value into the
10287/// corresponding column of the returned `Vec<Value>`. If
10288/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
10289/// the conflicting row is silently kept unchanged.
10290fn apply_on_conflict_assignments(
10291    catalog: &Catalog,
10292    table_name: &str,
10293    target_pos: usize,
10294    incoming: &[Value],
10295    assignments: &[(String, Expr)],
10296    where_: Option<&Expr>,
10297) -> Result<Option<Vec<Value>>, EngineError> {
10298    let table = catalog.get(table_name).ok_or_else(|| {
10299        EngineError::Storage(StorageError::TableNotFound {
10300            name: table_name.into(),
10301        })
10302    })?;
10303    let schema_cols = table.schema().columns.clone();
10304    let existing = table
10305        .rows()
10306        .get(target_pos)
10307        .ok_or_else(|| {
10308            EngineError::Unsupported(alloc::format!(
10309                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
10310            ))
10311        })?
10312        .clone();
10313    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
10314    // Optional WHERE filter on the conflict row.
10315    if let Some(w) = where_ {
10316        let pred = w.clone();
10317        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
10318        let v = eval::eval_expr(&pred, &existing, &ctx)?;
10319        if !matches!(v, Value::Bool(true)) {
10320            return Ok(None);
10321        }
10322    }
10323    let mut new_values = existing.values.clone();
10324    for (col_name, expr) in assignments {
10325        let target_idx = schema_cols
10326            .iter()
10327            .position(|c| c.name == *col_name)
10328            .ok_or_else(|| {
10329                EngineError::Eval(EvalError::ColumnNotFound {
10330                    name: col_name.clone(),
10331                })
10332            })?;
10333        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
10334        let v = eval::eval_expr(&sub, &existing, &ctx)?;
10335        new_values[target_idx] = coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
10336    }
10337    Ok(Some(new_values))
10338}
10339
10340/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
10341/// "EXCLUDED", name }` reference with a `Literal` of the matching
10342/// value from the incoming-row vec. Resolution against the
10343/// child-table column list (by name).
10344fn substitute_excluded_refs(expr: Expr, schema_cols: &[ColumnSchema], incoming: &[Value]) -> Expr {
10345    use spg_sql::ast::ColumnName;
10346    match expr {
10347        Expr::Column(ColumnName { qualifier, name })
10348            if qualifier
10349                .as_deref()
10350                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
10351        {
10352            let pos = schema_cols.iter().position(|c| c.name == name);
10353            match pos {
10354                Some(p) => {
10355                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
10356                    value_to_literal_expr(v)
10357                        .unwrap_or_else(|_| Expr::Literal(spg_sql::ast::Literal::Null))
10358                }
10359                None => Expr::Column(ColumnName { qualifier, name }),
10360            }
10361        }
10362        Expr::Binary { op, lhs, rhs } => Expr::Binary {
10363            op,
10364            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
10365            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
10366        },
10367        Expr::Unary { op, expr } => Expr::Unary {
10368            op,
10369            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
10370        },
10371        Expr::FunctionCall { name, args } => Expr::FunctionCall {
10372            name,
10373            args: args
10374                .into_iter()
10375                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
10376                .collect(),
10377        },
10378        other => other,
10379    }
10380}
10381
10382/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
10383/// about to be inserted into `child_table`, every FK declared on
10384/// that table is checked: the row's FK columns must either be
10385/// NULL (SQL spec skip) or match an existing parent row via the
10386/// parent's BTree PK / UNIQUE index.
10387///
10388/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
10389/// payload on first failure.
10390///
10391/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
10392/// == child_table`, the parent rows visible to this check are
10393///  (a) rows already committed to the table, plus
10394///  (b) earlier rows from the *same* `rows` batch.
10395/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
10396/// work in a single statement — common pattern for bulk-loading
10397/// hierarchies.
10398/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
10399/// constraints at INSERT time. For each constraint declared on
10400/// the target table, check that no existing row + no earlier row
10401/// in the same batch has the same full-column tuple. NULL in
10402/// any column lifts the row out of the check (SQL spec: NULL
10403/// ≠ NULL for uniqueness). mailrs G1 + G6.
10404fn enforce_uniqueness_inserts(
10405    catalog: &Catalog,
10406    child_table: &str,
10407    constraints: &[spg_storage::UniquenessConstraint],
10408    rows: &[Vec<Value>],
10409) -> Result<(), EngineError> {
10410    if constraints.is_empty() {
10411        return Ok(());
10412    }
10413    let table = catalog.get(child_table).ok_or_else(|| {
10414        EngineError::Storage(StorageError::TableNotFound {
10415            name: child_table.into(),
10416        })
10417    })?;
10418    for uc in constraints {
10419        for (batch_idx, row_values) in rows.iter().enumerate() {
10420            let key: Vec<&Value> = uc.columns.iter().map(|&i| &row_values[i]).collect();
10421            let has_null = key.iter().any(|v| matches!(v, Value::Null));
10422            // v7.13.0 — `NULLS NOT DISTINCT` (mailrs round-5 G10,
10423            // PG 15+): two rows whose constrained columns are all
10424            // NULL collide. SQL-standard `NULLS DISTINCT` lets any
10425            // NULL skip the check.
10426            if has_null && !uc.nulls_not_distinct {
10427                continue;
10428            }
10429            // Table-side collision: scan existing rows.
10430            let collides_in_table = table.rows().iter().any(|prow| {
10431                uc.columns
10432                    .iter()
10433                    .enumerate()
10434                    .all(|(i, &p)| prow.values.get(p) == Some(key[i]))
10435            });
10436            // Batch-side collision: earlier rows in the same INSERT.
10437            let collides_in_batch = rows[..batch_idx].iter().any(|earlier| {
10438                uc.columns
10439                    .iter()
10440                    .enumerate()
10441                    .all(|(i, &p)| earlier.get(p) == Some(key[i]))
10442            });
10443            if collides_in_table || collides_in_batch {
10444                let kind = if uc.is_primary_key {
10445                    "PRIMARY KEY"
10446                } else {
10447                    "UNIQUE"
10448                };
10449                let col_names: Vec<String> = uc
10450                    .columns
10451                    .iter()
10452                    .map(|&i| table.schema().columns[i].name.clone())
10453                    .collect();
10454                return Err(EngineError::Unsupported(alloc::format!(
10455                    "{kind} violation on {child_table:?} columns {col_names:?}: \
10456                     row #{batch_idx} duplicates an existing key"
10457                )));
10458            }
10459        }
10460    }
10461    Ok(())
10462}
10463
10464/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
10465/// WHERE-style predicate. NULL → false (three-valued logic
10466/// collapses to "skip this row" for index inclusion). Numeric
10467/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
10468/// Everything else (strings, vectors, JSON, …) is not a valid
10469/// predicate result and surfaces as `false` so a malformed
10470/// predicate degrades to "row not in index" rather than panicking.
10471fn predicate_truthy(v: &spg_storage::Value) -> bool {
10472    use spg_storage::Value as V;
10473    match v {
10474        V::Bool(b) => *b,
10475        V::Int(n) => *n != 0,
10476        V::BigInt(n) => *n != 0,
10477        V::SmallInt(n) => *n != 0,
10478        _ => false,
10479    }
10480}
10481
10482/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
10483/// committed rows for pre-existing duplicates. If any pair of rows
10484/// matches the predicate AND has the same index key, refuse to
10485/// create the index so the user fixes the data before retrying.
10486fn check_existing_unique_violation(
10487    idx: &spg_storage::Index,
10488    schema: &spg_storage::TableSchema,
10489    rows: &[spg_storage::Row],
10490) -> Result<(), EngineError> {
10491    let predicate_expr = match idx.partial_predicate.as_deref() {
10492        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
10493            EngineError::Unsupported(alloc::format!(
10494                "stored partial predicate {s:?} failed to re-parse: {e:?}"
10495            ))
10496        })?),
10497        None => None,
10498    };
10499    let ctx = eval::EvalContext::new(&schema.columns, None);
10500    let key_positions = unique_key_positions(idx);
10501    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
10502    for row in rows {
10503        if let Some(expr) = &predicate_expr {
10504            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
10505                EngineError::Unsupported(alloc::format!(
10506                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
10507                ))
10508            })?;
10509            if !predicate_truthy(&v) {
10510                continue;
10511            }
10512        }
10513        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
10514            .iter()
10515            .map(|&p| {
10516                row.values
10517                    .get(p)
10518                    .cloned()
10519                    .unwrap_or(spg_storage::Value::Null)
10520            })
10521            .collect();
10522        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
10523            continue;
10524        }
10525        if seen.iter().any(|other| *other == key) {
10526            return Err(EngineError::Unsupported(alloc::format!(
10527                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
10528                idx.name
10529            )));
10530        }
10531        seen.push(key);
10532    }
10533    Ok(())
10534}
10535
10536/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
10537/// extra positions). For single-column indexes this is just
10538/// `[column_position]`.
10539fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
10540    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
10541    out.push(idx.column_position);
10542    out.extend_from_slice(&idx.extra_column_positions);
10543    out
10544}
10545
10546/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
10547/// target table. For each, eval the index's optional predicate
10548/// against (a) the candidate row and (b) every committed row plus
10549/// earlier batch rows; only rows where the predicate is truthy
10550/// participate. A duplicate key among predicate-matching rows is a
10551/// uniqueness violation. NULL keys lift the row out of the check
10552/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
10553fn enforce_unique_index_inserts(
10554    catalog: &Catalog,
10555    table_name: &str,
10556    rows: &[alloc::vec::Vec<spg_storage::Value>],
10557) -> Result<(), EngineError> {
10558    let table = catalog.get(table_name).ok_or_else(|| {
10559        EngineError::Storage(StorageError::TableNotFound {
10560            name: table_name.into(),
10561        })
10562    })?;
10563    let schema = table.schema();
10564    let ctx = eval::EvalContext::new(&schema.columns, None);
10565    for idx in table.indices() {
10566        if !idx.is_unique {
10567            continue;
10568        }
10569        // Re-parse the predicate once per index per batch.
10570        let predicate_expr = match idx.partial_predicate.as_deref() {
10571            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
10572                EngineError::Unsupported(alloc::format!(
10573                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
10574                    idx.name
10575                ))
10576            })?),
10577            None => None,
10578        };
10579        let key_positions = unique_key_positions(idx);
10580        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
10581            key_positions
10582                .iter()
10583                .map(|&p| values.get(p).cloned().unwrap_or(spg_storage::Value::Null))
10584                .collect()
10585        };
10586        // Helper: does `values` participate in this index? (predicate
10587        // truthy when present.) Wraps `values` into a transient Row
10588        // because eval_expr requires &Row.
10589        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
10590            let Some(expr) = &predicate_expr else {
10591                return Ok(true);
10592            };
10593            let tmp_row = spg_storage::Row {
10594                values: values.to_vec(),
10595            };
10596            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
10597                EngineError::Unsupported(alloc::format!(
10598                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
10599                    idx.name
10600                ))
10601            })?;
10602            Ok(predicate_truthy(&v))
10603        };
10604        for (batch_idx, row_values) in rows.iter().enumerate() {
10605            if !participates(row_values)? {
10606                continue;
10607            }
10608            let key = key_of(row_values);
10609            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
10610                continue;
10611            }
10612            // Committed-table collision.
10613            for prow in table.rows() {
10614                if !participates(&prow.values)? {
10615                    continue;
10616                }
10617                if key_of(&prow.values) == key {
10618                    return Err(EngineError::Unsupported(alloc::format!(
10619                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
10620                         row #{batch_idx} duplicates an existing key",
10621                        idx.name
10622                    )));
10623                }
10624            }
10625            // Within-batch collision: earlier rows in the same INSERT.
10626            for earlier in &rows[..batch_idx] {
10627                if !participates(earlier)? {
10628                    continue;
10629                }
10630                if key_of(earlier) == key {
10631                    return Err(EngineError::Unsupported(alloc::format!(
10632                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
10633                         row #{batch_idx} duplicates an earlier row in the same batch",
10634                        idx.name
10635                    )));
10636                }
10637            }
10638        }
10639    }
10640    Ok(())
10641}
10642
10643/// v7.13.0 — `UPDATE OF cols` filter helper (mailrs round-5 G7).
10644/// Returns `true` when at least one of `filter_cols` has a
10645/// different value in `new_row` vs `old_row`. Column lookup is
10646/// case-insensitive against `schema_cols`; unknown filter columns
10647/// are treated as "not changed" (the trigger therefore won't
10648/// fire on them — surfacing a parse-time error would be too
10649/// strict for catalog reloads where the schema may have drifted).
10650fn any_column_changed(
10651    filter_cols: &[String],
10652    schema_cols: &[ColumnSchema],
10653    old_row: &Row,
10654    new_row: &Row,
10655) -> bool {
10656    for col_name in filter_cols {
10657        let Some(pos) = schema_cols
10658            .iter()
10659            .position(|c| c.name.eq_ignore_ascii_case(col_name))
10660        else {
10661            continue;
10662        };
10663        let old_v = old_row.values.get(pos);
10664        let new_v = new_row.values.get(pos);
10665        if old_v != new_v {
10666            return true;
10667        }
10668    }
10669    false
10670}
10671
10672/// v7.13.0 — evaluate every CHECK predicate on the schema against
10673/// each candidate row. Mirrors PG semantics: a `false` result
10674/// rejects the mutation; a NULL result *passes* (CHECK rejects
10675/// only on definite-false, not on unknown). mailrs round-5 G3.
10676fn enforce_check_constraints(
10677    catalog: &Catalog,
10678    table_name: &str,
10679    rows: &[alloc::vec::Vec<spg_storage::Value>],
10680) -> Result<(), EngineError> {
10681    let table = catalog.get(table_name).ok_or_else(|| {
10682        EngineError::Storage(StorageError::TableNotFound {
10683            name: table_name.into(),
10684        })
10685    })?;
10686    let schema = table.schema();
10687    if schema.checks.is_empty() {
10688        return Ok(());
10689    }
10690    let ctx = eval::EvalContext::new(&schema.columns, None);
10691    let mut parsed: alloc::vec::Vec<(usize, Expr)> = alloc::vec::Vec::new();
10692    for (i, src) in schema.checks.iter().enumerate() {
10693        let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
10694            EngineError::Unsupported(alloc::format!(
10695                "CHECK constraint #{i} on {table_name:?} ({src:?}) failed to re-parse: {e:?}"
10696            ))
10697        })?;
10698        parsed.push((i, expr));
10699    }
10700    for (batch_idx, row_values) in rows.iter().enumerate() {
10701        let tmp_row = spg_storage::Row {
10702            values: row_values.clone(),
10703        };
10704        for (i, expr) in &parsed {
10705            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
10706                EngineError::Unsupported(alloc::format!(
10707                    "CHECK constraint #{i} on {table_name:?} eval at row #{batch_idx}: {e:?}"
10708                ))
10709            })?;
10710            // PG: NULL passes (CHECK rejects on definite-false only).
10711            if matches!(v, spg_storage::Value::Bool(false)) {
10712                return Err(EngineError::Unsupported(alloc::format!(
10713                    "CHECK constraint violation on {table_name:?} (row #{batch_idx}): {:?}",
10714                    schema.checks[*i]
10715                )));
10716            }
10717        }
10718    }
10719    Ok(())
10720}
10721
10722fn enforce_fk_inserts(
10723    catalog: &Catalog,
10724    child_table: &str,
10725    fks: &[spg_storage::ForeignKeyConstraint],
10726    rows: &[Vec<Value>],
10727) -> Result<(), EngineError> {
10728    for fk in fks {
10729        let parent_is_self = fk.parent_table == child_table;
10730        let parent = if parent_is_self {
10731            // Self-ref: read the current state of the same table.
10732            // The mut borrow on child has been dropped by the caller.
10733            catalog.get(child_table).ok_or_else(|| {
10734                EngineError::Storage(StorageError::TableNotFound {
10735                    name: child_table.into(),
10736                })
10737            })?
10738        } else {
10739            catalog.get(&fk.parent_table).ok_or_else(|| {
10740                EngineError::Storage(StorageError::TableNotFound {
10741                    name: fk.parent_table.clone(),
10742                })
10743            })?
10744        };
10745        for (batch_idx, row_values) in rows.iter().enumerate() {
10746            // Single-column FK fast path: try the parent's BTree
10747            // index for an O(log n) lookup. Composite FKs fall back
10748            // to a parent-row scan.
10749            if fk.local_columns.len() == 1 {
10750                let v = &row_values[fk.local_columns[0]];
10751                if matches!(v, Value::Null) {
10752                    continue;
10753                }
10754                let parent_col = fk.parent_columns[0];
10755                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
10756                    EngineError::Unsupported(alloc::format!(
10757                        "FOREIGN KEY column value of type {:?} is not index-eligible",
10758                        v.data_type()
10759                    ))
10760                })?;
10761                let present_committed = parent.indices().iter().any(|idx| {
10762                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10763                        && idx.column_position == parent_col
10764                        && idx.partial_predicate.is_none()
10765                        && !idx.lookup_eq(&key).is_empty()
10766                });
10767                // v7.6.7 self-ref widening: also accept a match
10768                // against earlier rows in this same batch when the
10769                // FK points at the table being inserted into.
10770                let present_in_batch = parent_is_self
10771                    && rows[..batch_idx]
10772                        .iter()
10773                        .any(|earlier| earlier.get(parent_col) == Some(v));
10774                if !(present_committed || present_in_batch) {
10775                    return Err(EngineError::Unsupported(alloc::format!(
10776                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
10777                        fk.parent_table,
10778                        parent
10779                            .schema()
10780                            .columns
10781                            .get(parent_col)
10782                            .map_or("?", |c| c.name.as_str()),
10783                        v,
10784                    )));
10785                }
10786            } else {
10787                // Composite FK: scan parent rows. v7.6.7 also
10788                // accepts a match against earlier rows in the same
10789                // batch (self-ref bulk-loading of hierarchies).
10790                if fk
10791                    .local_columns
10792                    .iter()
10793                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
10794                {
10795                    continue;
10796                }
10797                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
10798                let parent_match_committed = parent.rows().iter().any(|prow| {
10799                    fk.parent_columns
10800                        .iter()
10801                        .enumerate()
10802                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
10803                });
10804                let parent_match_in_batch = parent_is_self
10805                    && rows[..batch_idx].iter().any(|earlier| {
10806                        fk.parent_columns
10807                            .iter()
10808                            .enumerate()
10809                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
10810                    });
10811                if !(parent_match_committed || parent_match_in_batch) {
10812                    return Err(EngineError::Unsupported(alloc::format!(
10813                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
10814                        fk.parent_table,
10815                    )));
10816                }
10817            }
10818        }
10819    }
10820    Ok(())
10821}
10822
10823/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
10824/// DELETE on a parent. The plan is a list of these steps, stacked
10825/// across the FK graph by `plan_fk_parent_deletions`.
10826#[derive(Debug, Clone)]
10827struct FkChildStep {
10828    child_table: String,
10829    action: FkChildAction,
10830}
10831
10832#[derive(Debug, Clone)]
10833enum FkChildAction {
10834    /// CASCADE — remove these rows. Sorted, deduplicated positions.
10835    Delete { positions: Vec<usize> },
10836    /// SET NULL — for each (row, column) in the flat list, write
10837    /// NULL into that child cell. Multiple FKs on the same row may
10838    /// produce overlapping entries (deduped at plan time).
10839    SetNull {
10840        positions: Vec<usize>,
10841        columns: Vec<usize>,
10842    },
10843    /// SET DEFAULT — same shape as SetNull but writes the column's
10844    /// declared DEFAULT value (resolved at plan time). Columns
10845    /// without a DEFAULT raise an error during planning.
10846    SetDefault {
10847        positions: Vec<usize>,
10848        columns: Vec<usize>,
10849        defaults: Vec<Value>,
10850    },
10851}
10852
10853/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
10854///
10855/// Walks every table in the catalog looking for FKs whose
10856/// `parent_table` is `parent_table_name`. For each such FK + each
10857/// to-be-deleted parent row:
10858///
10859///   - RESTRICT / NoAction → error, no plan returned
10860///   - CASCADE → child rows get scheduled for deletion; recursive
10861///   - SetNull → child FK column(s) scheduled to be NULL-ed.
10862///     Verified NULL-able at plan time.
10863///   - SetDefault → child FK column(s) scheduled to be reset to
10864///     their declared DEFAULT. Columns without a DEFAULT raise.
10865///
10866/// SET NULL / SET DEFAULT do NOT cascade further — the child row
10867/// stays; only one of its columns mutates.
10868fn plan_fk_parent_deletions(
10869    catalog: &Catalog,
10870    parent_table_name: &str,
10871    to_delete_positions: &[usize],
10872    to_delete_rows: &[Vec<Value>],
10873) -> Result<Vec<FkChildStep>, EngineError> {
10874    use alloc::collections::{BTreeMap, BTreeSet};
10875    if to_delete_rows.is_empty() {
10876        return Ok(Vec::new());
10877    }
10878    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
10879    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
10880    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
10881    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10882    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
10883    for &p in to_delete_positions {
10884        visited.insert((parent_table_name.to_string(), p));
10885    }
10886    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
10887        .iter()
10888        .map(|r| (parent_table_name.to_string(), r.clone()))
10889        .collect();
10890    while let Some((cur_parent, parent_row)) = work.pop() {
10891        for child_name in catalog.table_names() {
10892            let child = catalog
10893                .get(&child_name)
10894                .expect("table_names → catalog.get round-trip is total");
10895            for fk in &child.schema().foreign_keys {
10896                if fk.parent_table != cur_parent {
10897                    continue;
10898                }
10899                let parent_key: Vec<&Value> = fk
10900                    .parent_columns
10901                    .iter()
10902                    .map(|&pi| &parent_row[pi])
10903                    .collect();
10904                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
10905                    continue;
10906                }
10907                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
10908                    if child_name == cur_parent
10909                        && visited.contains(&(child_name.clone(), child_row_idx))
10910                    {
10911                        continue;
10912                    }
10913                    let matches_key = fk
10914                        .local_columns
10915                        .iter()
10916                        .enumerate()
10917                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
10918                    if !matches_key {
10919                        continue;
10920                    }
10921                    match fk.on_delete {
10922                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
10923                            return Err(EngineError::Unsupported(alloc::format!(
10924                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
10925                                 restricted by FK from {child_name:?}.{:?}",
10926                                fk.local_columns,
10927                            )));
10928                        }
10929                        spg_storage::FkAction::Cascade => {
10930                            if visited.insert((child_name.clone(), child_row_idx)) {
10931                                delete_plan
10932                                    .entry(child_name.clone())
10933                                    .or_default()
10934                                    .insert(child_row_idx);
10935                                work.push((child_name.clone(), child_row.values.clone()));
10936                            }
10937                        }
10938                        spg_storage::FkAction::SetNull => {
10939                            // Verify every local FK column is NULL-able.
10940                            for &li in &fk.local_columns {
10941                                let col = child.schema().columns.get(li).ok_or_else(|| {
10942                                    EngineError::Unsupported(alloc::format!(
10943                                        "FK local column {li} missing in {child_name:?}"
10944                                    ))
10945                                })?;
10946                                if !col.nullable {
10947                                    return Err(EngineError::Unsupported(alloc::format!(
10948                                        "FOREIGN KEY ON DELETE SET NULL: column \
10949                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
10950                                        col.name,
10951                                    )));
10952                                }
10953                            }
10954                            let entry = setnull_plan.entry(child_name.clone()).or_default();
10955                            for &li in &fk.local_columns {
10956                                entry.insert((child_row_idx, li));
10957                            }
10958                        }
10959                        spg_storage::FkAction::SetDefault => {
10960                            // Resolve the DEFAULT for every local FK col.
10961                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
10962                            for &li in &fk.local_columns {
10963                                let col = child.schema().columns.get(li).ok_or_else(|| {
10964                                    EngineError::Unsupported(alloc::format!(
10965                                        "FK local column {li} missing in {child_name:?}"
10966                                    ))
10967                                })?;
10968                                let default = col.default.clone().ok_or_else(|| {
10969                                    EngineError::Unsupported(alloc::format!(
10970                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
10971                                         {child_name:?}.{:?} has no DEFAULT declared",
10972                                        col.name,
10973                                    ))
10974                                })?;
10975                                entry.insert((child_row_idx, li), default);
10976                            }
10977                        }
10978                    }
10979                }
10980            }
10981        }
10982    }
10983    // Flatten the three plans into the ordered `FkChildStep` list.
10984    // Deletes are applied last per child (after any null/default
10985    // re-writes on the same child) so a child row that's both
10986    // re-written and then cascade-deleted only ends up deleted —
10987    // but in v7.6.5 SetNull/Cascade never overlap on the same row
10988    // (a single FK chooses exactly one action), so the order is
10989    // mostly a precaution.
10990    let mut steps: Vec<FkChildStep> = Vec::new();
10991    for (child_table, entries) in setnull_plan {
10992        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
10993        steps.push(FkChildStep {
10994            child_table,
10995            action: FkChildAction::SetNull { positions, columns },
10996        });
10997    }
10998    for (child_table, entries) in setdefault_plan {
10999        let mut positions = Vec::with_capacity(entries.len());
11000        let mut columns = Vec::with_capacity(entries.len());
11001        let mut defaults = Vec::with_capacity(entries.len());
11002        for ((p, c), v) in entries {
11003            positions.push(p);
11004            columns.push(c);
11005            defaults.push(v);
11006        }
11007        steps.push(FkChildStep {
11008            child_table,
11009            action: FkChildAction::SetDefault {
11010                positions,
11011                columns,
11012                defaults,
11013            },
11014        });
11015    }
11016    for (child_table, positions) in delete_plan {
11017        steps.push(FkChildStep {
11018            child_table,
11019            action: FkChildAction::Delete {
11020                positions: positions.into_iter().collect(),
11021            },
11022        });
11023    }
11024    Ok(steps)
11025}
11026
11027/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
11028/// PK/UNIQUE columns. Walks every other table whose FK references
11029/// `parent_table_name`; for each FK whose parent_columns overlap a
11030/// mutated column, decides the action by `fk.on_update`.
11031///
11032///   - RESTRICT / NoAction → error if any child references the OLD
11033///     value
11034///   - CASCADE → child FK columns get rewritten to the NEW parent
11035///     value (a SetNull-style update step with the new value)
11036///   - SetNull → child FK columns set to NULL
11037///   - SetDefault → child FK columns set to declared default
11038///
11039/// `plan_with_old` is `(row_position, old_values, new_values)` so
11040/// the planner can detect "did this row's parent key actually
11041/// change?" — only rows where at least one referenced parent
11042/// column moved trigger inbound work.
11043fn plan_fk_parent_updates(
11044    catalog: &Catalog,
11045    parent_table_name: &str,
11046    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
11047) -> Result<Vec<FkChildStep>, EngineError> {
11048    use alloc::collections::BTreeMap;
11049    if plan_with_old.is_empty() {
11050        return Ok(Vec::new());
11051    }
11052    // For each child table we may touch, build per-child step
11053    // lists. UPDATE never deletes children — `delete_plan` stays
11054    // empty here but is kept structurally aligned with
11055    // `plan_fk_parent_deletions` for future use.
11056    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
11057    let mut setnull_plan: BTreeMap<String, alloc::collections::BTreeSet<(usize, usize)>> =
11058        BTreeMap::new();
11059    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
11060    // Cascade-update plan: child_table → row_idx → col_idx → new_value
11061    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
11062
11063    for child_name in catalog.table_names() {
11064        let child = catalog
11065            .get(&child_name)
11066            .expect("table_names → catalog.get total");
11067        for fk in &child.schema().foreign_keys {
11068            if fk.parent_table != parent_table_name {
11069                continue;
11070            }
11071            for (_pos, old_row, new_row) in plan_with_old {
11072                // Did any parent FK column change?
11073                let key_changed = fk
11074                    .parent_columns
11075                    .iter()
11076                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
11077                if !key_changed {
11078                    continue;
11079                }
11080                // The OLD parent key — used to find referring children.
11081                let old_key: Vec<&Value> =
11082                    fk.parent_columns.iter().map(|&pi| &old_row[pi]).collect();
11083                if old_key.iter().any(|v| matches!(v, Value::Null)) {
11084                    // NULL parent has no children — skip.
11085                    continue;
11086                }
11087                let new_key: Vec<&Value> =
11088                    fk.parent_columns.iter().map(|&pi| &new_row[pi]).collect();
11089                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
11090                    // Self-ref same-row updates: a row updating its
11091                    // own PK doesn't restrict itself.
11092                    if child_name == parent_table_name
11093                        && plan_with_old.iter().any(|(p, _, _)| *p == child_row_idx)
11094                    {
11095                        continue;
11096                    }
11097                    let matches_key = fk
11098                        .local_columns
11099                        .iter()
11100                        .enumerate()
11101                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
11102                    if !matches_key {
11103                        continue;
11104                    }
11105                    match fk.on_update {
11106                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
11107                            return Err(EngineError::Unsupported(alloc::format!(
11108                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
11109                                 restricted by FK from {child_name:?}.{:?}",
11110                                fk.local_columns,
11111                            )));
11112                        }
11113                        spg_storage::FkAction::Cascade => {
11114                            // Rewrite child FK columns to new key.
11115                            let entry = cascade_plan.entry(child_name.clone()).or_default();
11116                            for (i, &li) in fk.local_columns.iter().enumerate() {
11117                                entry.insert((child_row_idx, li), new_key[i].clone());
11118                            }
11119                        }
11120                        spg_storage::FkAction::SetNull => {
11121                            for &li in &fk.local_columns {
11122                                let col = child.schema().columns.get(li).ok_or_else(|| {
11123                                    EngineError::Unsupported(alloc::format!(
11124                                        "FK local column {li} missing in {child_name:?}"
11125                                    ))
11126                                })?;
11127                                if !col.nullable {
11128                                    return Err(EngineError::Unsupported(alloc::format!(
11129                                        "FOREIGN KEY ON UPDATE SET NULL: column \
11130                                         {child_name:?}.{:?} is NOT NULL",
11131                                        col.name,
11132                                    )));
11133                                }
11134                            }
11135                            let entry = setnull_plan.entry(child_name.clone()).or_default();
11136                            for &li in &fk.local_columns {
11137                                entry.insert((child_row_idx, li));
11138                            }
11139                        }
11140                        spg_storage::FkAction::SetDefault => {
11141                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
11142                            for &li in &fk.local_columns {
11143                                let col = child.schema().columns.get(li).ok_or_else(|| {
11144                                    EngineError::Unsupported(alloc::format!(
11145                                        "FK local column {li} missing in {child_name:?}"
11146                                    ))
11147                                })?;
11148                                let default = col.default.clone().ok_or_else(|| {
11149                                    EngineError::Unsupported(alloc::format!(
11150                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
11151                                         {child_name:?}.{:?} has no DEFAULT",
11152                                        col.name,
11153                                    ))
11154                                })?;
11155                                entry.insert((child_row_idx, li), default);
11156                            }
11157                        }
11158                    }
11159                }
11160            }
11161        }
11162    }
11163    // Flatten into FkChildStep list. UPDATE doesn't produce
11164    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
11165    let mut steps: Vec<FkChildStep> = Vec::new();
11166    for (child_table, entries) in cascade_plan {
11167        let mut positions = Vec::with_capacity(entries.len());
11168        let mut columns = Vec::with_capacity(entries.len());
11169        let mut defaults = Vec::with_capacity(entries.len());
11170        for ((p, c), v) in entries {
11171            positions.push(p);
11172            columns.push(c);
11173            defaults.push(v);
11174        }
11175        // We reuse `FkChildAction::SetDefault` for cascade-update:
11176        // both shapes are "write a known value into specific cells"
11177        // — `apply_per_cell_writes` doesn't care whether the value
11178        // came from a DEFAULT declaration or a new parent key.
11179        steps.push(FkChildStep {
11180            child_table,
11181            action: FkChildAction::SetDefault {
11182                positions,
11183                columns,
11184                defaults,
11185            },
11186        });
11187    }
11188    for (child_table, entries) in setnull_plan {
11189        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
11190        steps.push(FkChildStep {
11191            child_table,
11192            action: FkChildAction::SetNull { positions, columns },
11193        });
11194    }
11195    for (child_table, entries) in setdefault_plan {
11196        let mut positions = Vec::with_capacity(entries.len());
11197        let mut columns = Vec::with_capacity(entries.len());
11198        let mut defaults = Vec::with_capacity(entries.len());
11199        for ((p, c), v) in entries {
11200            positions.push(p);
11201            columns.push(c);
11202            defaults.push(v);
11203        }
11204        steps.push(FkChildStep {
11205            child_table,
11206            action: FkChildAction::SetDefault {
11207                positions,
11208                columns,
11209                defaults,
11210            },
11211        });
11212    }
11213    let _ = delete_plan; // UPDATE never deletes children.
11214    Ok(steps)
11215}
11216
11217/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
11218/// the three action variants so the DELETE executor stays a
11219/// simple loop over the planned steps.
11220fn apply_fk_child_step(catalog: &mut Catalog, step: &FkChildStep) -> Result<(), EngineError> {
11221    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
11222        EngineError::Storage(StorageError::TableNotFound {
11223            name: step.child_table.clone(),
11224        })
11225    })?;
11226    match &step.action {
11227        FkChildAction::Delete { positions } => {
11228            let _ = child.delete_rows(positions);
11229        }
11230        FkChildAction::SetNull { positions, columns } => {
11231            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
11232        }
11233        FkChildAction::SetDefault {
11234            positions,
11235            columns,
11236            defaults,
11237        } => {
11238            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
11239        }
11240    }
11241    Ok(())
11242}
11243
11244/// v7.6.5 — write new values into selected child cells via
11245/// `Table::update_row` (the catalog's existing UPDATE entry).
11246/// Groups writes by row position so multi-column updates on the
11247/// same row only call `update_row` once. `value_for(i)` produces
11248/// the new value for the i-th (position, column) entry.
11249fn apply_per_cell_writes(
11250    child: &mut spg_storage::Table,
11251    positions: &[usize],
11252    columns: &[usize],
11253    mut value_for: impl FnMut(usize) -> Value,
11254) -> Result<(), EngineError> {
11255    use alloc::collections::BTreeMap;
11256    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
11257    for i in 0..positions.len() {
11258        by_row
11259            .entry(positions[i])
11260            .or_default()
11261            .push((columns[i], value_for(i)));
11262    }
11263    for (pos, mutations) in by_row {
11264        let mut new_values = child.rows()[pos].values.clone();
11265        for (col, v) in mutations {
11266            if let Some(slot) = new_values.get_mut(col) {
11267                *slot = v;
11268            }
11269        }
11270        child
11271            .update_row(pos, new_values)
11272            .map_err(EngineError::Storage)?;
11273    }
11274    Ok(())
11275}
11276
11277fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
11278    match a {
11279        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
11280        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
11281        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
11282        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
11283        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
11284    }
11285}
11286
11287/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
11288/// default-fill. Free fn (rather than `&self`) so callers
11289/// with an active `&mut Table` borrow can still use it.
11290/// Literal defaults take the cached path (`col.default`);
11291/// runtime defaults hit `clock_fn` at each call. mailrs G4.
11292fn resolve_column_default_free(
11293    col: &ColumnSchema,
11294    clock_fn: Option<ClockFn>,
11295) -> Result<Value, EngineError> {
11296    if let Some(rt) = &col.runtime_default {
11297        return eval_runtime_default_free(rt, col.ty, clock_fn);
11298    }
11299    Ok(col.default.clone().unwrap_or(Value::Null))
11300}
11301
11302fn eval_runtime_default_free(
11303    rt: &str,
11304    ty: DataType,
11305    clock_fn: Option<ClockFn>,
11306) -> Result<Value, EngineError> {
11307    let s = rt.trim().to_ascii_lowercase();
11308    let canonical = s.trim_end_matches("()");
11309    let now_us = match clock_fn {
11310        Some(f) => f(),
11311        None => 0,
11312    };
11313    let v = match canonical {
11314        "now" | "current_timestamp" | "localtimestamp" => Value::Timestamp(now_us),
11315        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
11316        "current_time" | "localtime" => Value::Timestamp(now_us),
11317        other => {
11318            return Err(EngineError::Unsupported(alloc::format!(
11319                "runtime DEFAULT expression {other:?} not supported \
11320                 (v7.9.21 whitelist: now() / current_timestamp / \
11321                 current_date / current_time / localtimestamp / \
11322                 localtime)"
11323            )));
11324        }
11325    };
11326    coerce_value(v, ty, "DEFAULT", 0)
11327}
11328
11329/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
11330/// evaluation rather than being cacheable as a literal Value.
11331/// FunctionCall is the immediate case (`now()`,
11332/// `current_timestamp`). Literal expressions and simple sign-
11333/// flipped numerics still take the static-cache path.
11334fn is_runtime_default_expr(expr: &Expr) -> bool {
11335    match expr {
11336        Expr::FunctionCall { .. } => true,
11337        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
11338        _ => false,
11339    }
11340}
11341
11342fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
11343    let ty = column_type_to_data_type(c.ty);
11344    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
11345    if let Some(default_expr) = c.default {
11346        // v7.9.21 — distinguish literal defaults (evaluated once
11347        // at CREATE TABLE) from expression defaults (deferred to
11348        // INSERT). Function calls (`now()`, `current_timestamp`
11349        // — see v7.9.20 keyword promotion) take the runtime path.
11350        // Literals continue to cache. mailrs G4.
11351        if is_runtime_default_expr(&default_expr) {
11352            let display = alloc::format!("{default_expr}");
11353            schema = schema.with_runtime_default(display);
11354        } else {
11355            let raw = literal_expr_to_value(default_expr)?;
11356            let coerced = coerce_value(raw, ty, &c.name, 0)?;
11357            schema = schema.with_default(coerced);
11358        }
11359    }
11360    if c.auto_increment {
11361        // AUTO_INCREMENT only makes sense on integer-shaped columns.
11362        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
11363            return Err(EngineError::Unsupported(alloc::format!(
11364                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
11365            )));
11366        }
11367        schema = schema.with_auto_increment();
11368    }
11369    Ok(schema)
11370}
11371
11372/// v7.10.4 — decode a BYTEA literal. Accepts:
11373///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
11374///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
11375///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
11376fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
11377    let s = s.trim();
11378    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
11379        // Hex form. Each pair of hex digits → one byte.
11380        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
11381        if cleaned.len() % 2 != 0 {
11382            return Err("odd-length hex literal");
11383        }
11384        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
11385        let cleaned_bytes = cleaned.as_bytes();
11386        for i in (0..cleaned_bytes.len()).step_by(2) {
11387            let hi = hex_nibble(cleaned_bytes[i])?;
11388            let lo = hex_nibble(cleaned_bytes[i + 1])?;
11389            out.push((hi << 4) | lo);
11390        }
11391        return Ok(out);
11392    }
11393    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
11394    // sequences decode; anything else is a literal byte.
11395    let bytes = s.as_bytes();
11396    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
11397    let mut i = 0;
11398    while i < bytes.len() {
11399        let b = bytes[i];
11400        if b == b'\\' && i + 1 < bytes.len() {
11401            let n = bytes[i + 1];
11402            if n == b'\\' {
11403                out.push(b'\\');
11404                i += 2;
11405                continue;
11406            }
11407            if n.is_ascii_digit()
11408                && i + 3 < bytes.len()
11409                && bytes[i + 2].is_ascii_digit()
11410                && bytes[i + 3].is_ascii_digit()
11411            {
11412                let oct = |x: u8| (x - b'0') as u32;
11413                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
11414                if v <= 0xFF {
11415                    out.push(v as u8);
11416                    i += 4;
11417                    continue;
11418                }
11419            }
11420        }
11421        out.push(b);
11422        i += 1;
11423    }
11424    Ok(out)
11425}
11426
11427fn hex_nibble(b: u8) -> Result<u8, &'static str> {
11428    match b {
11429        b'0'..=b'9' => Ok(b - b'0'),
11430        b'a'..=b'f' => Ok(b - b'a' + 10),
11431        b'A'..=b'F' => Ok(b - b'A' + 10),
11432        _ => Err("invalid hex digit"),
11433    }
11434}
11435
11436/// v7.10.11 — decode a PG TEXT[] external array form
11437/// (`{a,b,NULL}` with optional double-quoted elements). The
11438/// engine takes a leading/trailing `{`/`}` and splits at commas.
11439/// Quoted elements (`"hello, world"`) preserve embedded commas;
11440/// `\\` and `\"` decode to literal backslash / quote. Plain
11441/// unquoted `NULL` (case-insensitive) maps to `None`.
11442/// v7.11.13 — pick the array type for `ARRAY[lit, …]` from the
11443/// element values. Single-element-type rules:
11444///   - all NULL / all Text → TextArray
11445///   - all Int (or Int+NULL) → IntArray
11446///   - any BigInt without Text → BigIntArray (widening)
11447///   - any Text → TextArray (fallback; non-string elements
11448///     render as text)
11449fn array_literal_widen(items: alloc::vec::Vec<Value>) -> Value {
11450    let mut has_text = false;
11451    let mut has_bigint = false;
11452    let mut has_int = false;
11453    for v in &items {
11454        match v {
11455            Value::Null => {}
11456            Value::Text(_) | Value::Json(_) => has_text = true,
11457            Value::BigInt(_) => has_bigint = true,
11458            Value::Int(_) | Value::SmallInt(_) => has_int = true,
11459            _ => has_text = true,
11460        }
11461    }
11462    if has_text || (!has_bigint && !has_int) {
11463        let out: alloc::vec::Vec<Option<alloc::string::String>> = items
11464            .into_iter()
11465            .map(|v| match v {
11466                Value::Null => None,
11467                Value::Text(s) | Value::Json(s) => Some(s),
11468                other => Some(alloc::format!("{other:?}")),
11469            })
11470            .collect();
11471        return Value::TextArray(out);
11472    }
11473    if has_bigint {
11474        let out: alloc::vec::Vec<Option<i64>> = items
11475            .into_iter()
11476            .map(|v| match v {
11477                Value::Null => None,
11478                Value::Int(n) => Some(i64::from(n)),
11479                Value::SmallInt(n) => Some(i64::from(n)),
11480                Value::BigInt(n) => Some(n),
11481                _ => unreachable!("widen: unexpected non-integer in BigInt path"),
11482            })
11483            .collect();
11484        return Value::BigIntArray(out);
11485    }
11486    let out: alloc::vec::Vec<Option<i32>> = items
11487        .into_iter()
11488        .map(|v| match v {
11489            Value::Null => None,
11490            Value::Int(n) => Some(n),
11491            Value::SmallInt(n) => Some(i32::from(n)),
11492            _ => unreachable!("widen: unexpected non-i32-compatible in Int path"),
11493        })
11494        .collect();
11495    Value::IntArray(out)
11496}
11497
11498fn decode_text_array_literal(
11499    s: &str,
11500) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
11501    let trimmed = s.trim();
11502    let inner = trimmed
11503        .strip_prefix('{')
11504        .and_then(|x| x.strip_suffix('}'))
11505        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
11506    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
11507    if inner.trim().is_empty() {
11508        return Ok(out);
11509    }
11510    let bytes = inner.as_bytes();
11511    let mut i = 0;
11512    while i <= bytes.len() {
11513        // Skip leading whitespace.
11514        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
11515            i += 1;
11516        }
11517        // Quoted element.
11518        if i < bytes.len() && bytes[i] == b'"' {
11519            i += 1; // open quote
11520            let mut buf = alloc::string::String::new();
11521            while i < bytes.len() && bytes[i] != b'"' {
11522                if bytes[i] == b'\\' && i + 1 < bytes.len() {
11523                    buf.push(bytes[i + 1] as char);
11524                    i += 2;
11525                } else {
11526                    buf.push(bytes[i] as char);
11527                    i += 1;
11528                }
11529            }
11530            if i >= bytes.len() {
11531                return Err("unterminated quoted element");
11532            }
11533            i += 1; // close quote
11534            out.push(Some(buf));
11535        } else {
11536            // Unquoted element — read until next comma or end.
11537            let start = i;
11538            while i < bytes.len() && bytes[i] != b',' {
11539                i += 1;
11540            }
11541            let raw = inner[start..i].trim();
11542            if raw.eq_ignore_ascii_case("NULL") {
11543                out.push(None);
11544            } else {
11545                out.push(Some(alloc::string::ToString::to_string(raw)));
11546            }
11547        }
11548        // Skip whitespace, expect comma or end.
11549        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
11550            i += 1;
11551        }
11552        if i >= bytes.len() {
11553            break;
11554        }
11555        if bytes[i] != b',' {
11556            return Err("expected ',' between TEXT[] elements");
11557        }
11558        i += 1;
11559    }
11560    Ok(out)
11561}
11562
11563/// v7.10.11 — encode a TEXT[] back into the PG external array
11564/// form. NULL elements become the literal `NULL`; elements
11565/// containing commas, quotes, backslashes, or braces are
11566/// double-quoted with `\\` / `\"` escapes.
11567fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
11568    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
11569    out.push('{');
11570    for (i, item) in items.iter().enumerate() {
11571        if i > 0 {
11572            out.push(',');
11573        }
11574        match item {
11575            None => out.push_str("NULL"),
11576            Some(s) => {
11577                let needs_quote = s.is_empty()
11578                    || s.eq_ignore_ascii_case("NULL")
11579                    || s.chars()
11580                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
11581                if needs_quote {
11582                    out.push('"');
11583                    for c in s.chars() {
11584                        if c == '"' || c == '\\' {
11585                            out.push('\\');
11586                        }
11587                        out.push(c);
11588                    }
11589                    out.push('"');
11590                } else {
11591                    out.push_str(s);
11592                }
11593            }
11594        }
11595    }
11596    out.push('}');
11597    out
11598}
11599
11600/// v7.10.4 — encode BYTEA bytes in PG hex output format
11601/// (`\x` prefix, lowercase hex pairs). Used by Text-side
11602/// round-trip + the wire layer's text-mode encoder.
11603fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
11604    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
11605    out.push_str("\\x");
11606    for byte in b {
11607        let hi = byte >> 4;
11608        let lo = byte & 0x0F;
11609        out.push(hex_digit(hi));
11610        out.push(hex_digit(lo));
11611    }
11612    out
11613}
11614
11615const fn hex_digit(n: u8) -> char {
11616    match n {
11617        0..=9 => (b'0' + n) as char,
11618        10..=15 => (b'a' + n - 10) as char,
11619        _ => '?',
11620    }
11621}
11622
11623const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
11624    match t {
11625        ColumnTypeName::SmallInt => DataType::SmallInt,
11626        ColumnTypeName::Int => DataType::Int,
11627        ColumnTypeName::BigInt => DataType::BigInt,
11628        ColumnTypeName::Float => DataType::Float,
11629        ColumnTypeName::Text => DataType::Text,
11630        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
11631        ColumnTypeName::Char(n) => DataType::Char(n),
11632        ColumnTypeName::Bool => DataType::Bool,
11633        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
11634            dim,
11635            encoding: match encoding {
11636                SqlVecEncoding::F32 => VecEncoding::F32,
11637                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
11638                SqlVecEncoding::F16 => VecEncoding::F16,
11639            },
11640        },
11641        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
11642        ColumnTypeName::Date => DataType::Date,
11643        ColumnTypeName::Timestamp => DataType::Timestamp,
11644        ColumnTypeName::Timestamptz => DataType::Timestamptz,
11645        ColumnTypeName::Json => DataType::Json,
11646        ColumnTypeName::Jsonb => DataType::Jsonb,
11647        ColumnTypeName::Bytes => DataType::Bytes,
11648        ColumnTypeName::TextArray => DataType::TextArray,
11649        ColumnTypeName::IntArray => DataType::IntArray,
11650        ColumnTypeName::BigIntArray => DataType::BigIntArray,
11651        ColumnTypeName::TsVector => DataType::TsVector,
11652        ColumnTypeName::TsQuery => DataType::TsQuery,
11653    }
11654}
11655
11656/// Convert an INSERT VALUES expression to a storage Value. Supports literal
11657/// expressions, unary-minus over numeric literals, and pgvector-style
11658/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
11659fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
11660    match expr {
11661        Expr::Literal(l) => Ok(literal_to_value(l)),
11662        Expr::Cast { expr, target } => {
11663            let inner_value = literal_expr_to_value(*expr)?;
11664            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
11665        }
11666        Expr::Unary {
11667            op: UnOp::Neg,
11668            expr,
11669        } => match *expr {
11670            Expr::Literal(Literal::Integer(n)) => {
11671                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
11672                // — overflow on negate of i64::MIN is the one edge case.
11673                let neg = n.checked_neg().ok_or_else(|| {
11674                    EngineError::Unsupported("integer literal overflow on negation".into())
11675                })?;
11676                Ok(int_value_for(neg))
11677            }
11678            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
11679            other => Err(EngineError::Unsupported(alloc::format!(
11680                "unary minus over non-literal expression: {other:?}"
11681            ))),
11682        },
11683        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
11684        // INSERT-time. Each element must reduce to a Value through
11685        // `literal_expr_to_value`; NULL elements become `None`.
11686        // v7.11.13 — deduce shape from element values: all Int →
11687        // IntArray; any BigInt → BigIntArray (widening); any Text
11688        // → TextArray. Cast targets (`ARRAY[]::INT[]`) flow through
11689        // the outer Cast arm before reaching here and re-coerce.
11690        Expr::Array(items) => {
11691            let mut materialised: alloc::vec::Vec<Value> =
11692                alloc::vec::Vec::with_capacity(items.len());
11693            for elem in items {
11694                materialised.push(literal_expr_to_value(elem)?);
11695            }
11696            Ok(array_literal_widen(materialised))
11697        }
11698        other => Err(EngineError::Unsupported(alloc::format!(
11699            "non-literal INSERT value expression: {other:?}"
11700        ))),
11701    }
11702}
11703
11704fn literal_to_value(l: Literal) -> Value {
11705    match l {
11706        Literal::Integer(n) => int_value_for(n),
11707        Literal::Float(x) => Value::Float(x),
11708        Literal::String(s) => Value::Text(s),
11709        Literal::Bool(b) => Value::Bool(b),
11710        Literal::Null => Value::Null,
11711        Literal::Vector(v) => Value::Vector(v),
11712        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
11713    }
11714}
11715
11716/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
11717/// columns will still enforce the right tag downstream — this is just the
11718/// default we synthesise from an unannotated integer literal.
11719fn int_value_for(n: i64) -> Value {
11720    if let Ok(small) = i32::try_from(n) {
11721        Value::Int(small)
11722    } else {
11723        Value::BigInt(n)
11724    }
11725}
11726
11727/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
11728/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
11729/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
11730/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
11731/// `NULL` is always permitted; the nullability check happens later in storage.
11732#[allow(clippy::too_many_lines)]
11733fn coerce_value(
11734    v: Value,
11735    expected: DataType,
11736    col_name: &str,
11737    position: usize,
11738) -> Result<Value, EngineError> {
11739    if v.is_null() {
11740        return Ok(Value::Null);
11741    }
11742    let actual = v.data_type().expect("non-null");
11743    if actual == expected {
11744        return Ok(v);
11745    }
11746    let coerced = match (v, expected) {
11747        (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11748        (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11749        (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11750        (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11751            i128::from(n),
11752            precision,
11753            scale,
11754            col_name,
11755        )?),
11756        (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
11757        (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11758        (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11759        (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11760            i128::from(n),
11761            precision,
11762            scale,
11763            col_name,
11764        )?),
11765        (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
11766        (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11767        #[allow(clippy::cast_precision_loss)]
11768        (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
11769        (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11770            i128::from(n),
11771            precision,
11772            scale,
11773            col_name,
11774        )?),
11775        (Value::Float(x), DataType::Numeric { precision, scale }) => {
11776            Some(numeric_from_float(x, precision, scale, col_name)?)
11777        }
11778        // Text → DATE / TIMESTAMP: parse canonical text forms.
11779        (Value::Text(s), DataType::Date) => {
11780            let d = eval::parse_date_literal(&s).ok_or_else(|| {
11781                EngineError::Eval(EvalError::TypeMismatch {
11782                    detail: alloc::format!("cannot parse {s:?} as DATE for column `{col_name}`"),
11783                })
11784            })?;
11785            Some(Value::Date(d))
11786        }
11787        // v7.14.0 — MySQL DEFAULT clauses quote integer / float
11788        // / boolean literals (`DEFAULT '0'`, `DEFAULT '1'`,
11789        // `DEFAULT '3.14'`, `DEFAULT 'true'`). Coerce the text
11790        // form to the column's numeric / bool type at DEFAULT-
11791        // installation time so the storage check sees a typed
11792        // value. Parse failures fall through to TypeMismatch.
11793        (Value::Text(s), DataType::SmallInt) => s.parse::<i16>().ok().map(Value::SmallInt),
11794        (Value::Text(s), DataType::Int) => s.parse::<i32>().ok().map(Value::Int),
11795        (Value::Text(s), DataType::BigInt) => s.parse::<i64>().ok().map(Value::BigInt),
11796        (Value::Text(s), DataType::Float) => s.parse::<f64>().ok().map(Value::Float),
11797        (Value::Text(s), DataType::Bool) => match s.to_ascii_lowercase().as_str() {
11798            "0" | "false" | "f" | "no" | "off" => Some(Value::Bool(false)),
11799            "1" | "true" | "t" | "yes" | "on" => Some(Value::Bool(true)),
11800            _ => None,
11801        },
11802        // v4.9: Text ↔ JSON coercion. No structural validation —
11803        // any text literal is accepted; the responsibility for
11804        // valid JSON lies with the producer.
11805        (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
11806        (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
11807        // v7.13.3 — mailrs round-7 S10. SPG's storage represents
11808        // both JSON and JSONB on-disk as `Value::Json(String)` —
11809        // they share the underlying text payload. The cast
11810        // `'<text>'::jsonb` produces a Value::Json that needs to
11811        // satisfy a DataType::Jsonb column. Identity coerce in
11812        // both directions so JSON ↔ JSONB assignments work at all
11813        // INSERT / ALTER COLUMN TYPE / DEFAULT contexts.
11814        (Value::Json(s), DataType::Jsonb | DataType::Json) => Some(Value::Json(s)),
11815        // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
11816        //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
11817        //   - Escape: `Hello\\000world`  (backslash + octal triples)
11818        //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
11819        // Errors surface as TypeMismatch so the operator gets a
11820        // clear "this literal isn't a bytea literal" hint.
11821        (Value::Text(s), DataType::Bytes) => {
11822            let bytes = decode_bytea_literal(&s).map_err(|e| {
11823                EngineError::Eval(EvalError::TypeMismatch {
11824                    detail: alloc::format!(
11825                        "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
11826                    ),
11827                })
11828            })?;
11829            Some(Value::Bytes(bytes))
11830        }
11831        // v7.10.4 — BYTEA → Text round-trip uses the PG hex
11832        // output (lowercase, `\x` prefix). Important when a
11833        // SELECT pulls a bytea cell through a Text column path.
11834        (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
11835        // v7.10.11 — Text → TEXT[]. Decode PG's external array
11836        // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
11837        // is the literal `NULL`; everything else is a quoted or
11838        // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
11839        (Value::Text(s), DataType::TextArray) => {
11840            let arr = decode_text_array_literal(&s).map_err(|e| {
11841                EngineError::Eval(EvalError::TypeMismatch {
11842                    detail: alloc::format!(
11843                        "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
11844                    ),
11845                })
11846            })?;
11847            Some(Value::TextArray(arr))
11848        }
11849        // v7.16.0 — Text → IntArray / BigIntArray for the
11850        // spg-sqlx Bind path. Decode the PG external form
11851        // `{1,2,3}` as a TEXT array first, then parse each
11852        // element as int. Same shape as the TextArray decode
11853        // above with an element-wise narrow.
11854        (Value::Text(s), DataType::IntArray) => {
11855            let arr = decode_text_array_literal(&s).map_err(|e| {
11856                EngineError::Eval(EvalError::TypeMismatch {
11857                    detail: alloc::format!(
11858                        "cannot parse {s:?} as INT[] for column `{col_name}`: {e}"
11859                    ),
11860                })
11861            })?;
11862            let mut out: Vec<Option<i32>> = Vec::with_capacity(arr.len());
11863            for elem in arr {
11864                match elem {
11865                    None => out.push(None),
11866                    Some(t) => {
11867                        let n: i32 = t.parse().map_err(|_| {
11868                            EngineError::Eval(EvalError::TypeMismatch {
11869                                detail: alloc::format!(
11870                                    "cannot parse {t:?} as INT element for `{col_name}`"
11871                                ),
11872                            })
11873                        })?;
11874                        out.push(Some(n));
11875                    }
11876                }
11877            }
11878            Some(Value::IntArray(out))
11879        }
11880        (Value::Text(s), DataType::BigIntArray) => {
11881            let arr = decode_text_array_literal(&s).map_err(|e| {
11882                EngineError::Eval(EvalError::TypeMismatch {
11883                    detail: alloc::format!(
11884                        "cannot parse {s:?} as BIGINT[] for column `{col_name}`: {e}"
11885                    ),
11886                })
11887            })?;
11888            let mut out: Vec<Option<i64>> = Vec::with_capacity(arr.len());
11889            for elem in arr {
11890                match elem {
11891                    None => out.push(None),
11892                    Some(t) => {
11893                        let n: i64 = t.parse().map_err(|_| {
11894                            EngineError::Eval(EvalError::TypeMismatch {
11895                                detail: alloc::format!(
11896                                    "cannot parse {t:?} as BIGINT element for `{col_name}`"
11897                                ),
11898                            })
11899                        })?;
11900                        out.push(Some(n));
11901                    }
11902                }
11903            }
11904            Some(Value::BigIntArray(out))
11905        }
11906        // v7.10.11 — TEXT[] → Text round-trip uses PG's
11907        // external array form (`{a,b,NULL}`). Lets a SELECT
11908        // pull an array column through any Text-side codepath.
11909        (Value::TextArray(items), DataType::Text) => Some(Value::Text(encode_text_array(&items))),
11910        // v7.16.1 — Text → TSVECTOR auto-coerce for the
11911        // INSERT-side wire path (mailrs round-9 A.2.a). PG
11912        // implicitly promotes the TEXT literal at INSERT into a
11913        // TSVECTOR column; SPG previously rejected with a hard
11914        // type mismatch, blocking 23,276 pg_dump rows into
11915        // `messages.search_vector`. We route through the same
11916        // `decode_tsvector_external` the `::tsvector` cast
11917        // already uses, so PG-canonical forms (`'word'`,
11918        // `'word:1A,2B'`, multi-lexeme, empty `''`) all parse.
11919        (Value::Text(s), DataType::TsVector) => {
11920            let lexs = eval::decode_tsvector_external(&s).map_err(|e| {
11921                EngineError::Eval(EvalError::TypeMismatch {
11922                    detail: alloc::format!(
11923                        "cannot parse {s:?} as TSVECTOR for column `{col_name}`: {e}"
11924                    ),
11925                })
11926            })?;
11927            Some(Value::TsVector(lexs))
11928        }
11929        (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
11930            let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
11931                EngineError::Eval(EvalError::TypeMismatch {
11932                    detail: alloc::format!(
11933                        "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
11934                    ),
11935                })
11936            })?;
11937            Some(Value::Timestamp(t))
11938        }
11939        // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
11940        // TIMESTAMP → day truncation).
11941        (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
11942            Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
11943        }
11944        // v7.9.21 — Value::Timestamp lands in either Timestamp
11945        // or Timestamptz columns; the on-disk layout is the
11946        // same i64 microseconds UTC.
11947        (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
11948        (Value::Timestamp(t), DataType::Date) => {
11949            let days = t.div_euclid(86_400_000_000);
11950            i32::try_from(days).ok().map(Value::Date)
11951        }
11952        (
11953            Value::Numeric {
11954                scaled,
11955                scale: src_scale,
11956            },
11957            DataType::Numeric { precision, scale },
11958        ) => Some(numeric_rescale(
11959            scaled, src_scale, precision, scale, col_name,
11960        )?),
11961        #[allow(clippy::cast_precision_loss)]
11962        (Value::Numeric { scaled, scale }, DataType::Float) => {
11963            let mut div = 1.0_f64;
11964            for _ in 0..scale {
11965                div *= 10.0;
11966            }
11967            Some(Value::Float((scaled as f64) / div))
11968        }
11969        (Value::Numeric { scaled, scale }, DataType::Int) => {
11970            let truncated = numeric_truncate_to_integer(scaled, scale);
11971            i32::try_from(truncated).ok().map(Value::Int)
11972        }
11973        (Value::Numeric { scaled, scale }, DataType::BigInt) => {
11974            let truncated = numeric_truncate_to_integer(scaled, scale);
11975            i64::try_from(truncated).ok().map(Value::BigInt)
11976        }
11977        (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
11978            let truncated = numeric_truncate_to_integer(scaled, scale);
11979            i16::try_from(truncated).ok().map(Value::SmallInt)
11980        }
11981        // VARCHAR(n) enforces an upper bound on character count.
11982        (Value::Text(s), DataType::Varchar(max)) => {
11983            if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
11984                Some(Value::Text(s))
11985            } else {
11986                return Err(EngineError::Unsupported(alloc::format!(
11987                    "value for VARCHAR({max}) column `{col_name}` exceeds length: \
11988                     {} chars",
11989                    s.chars().count()
11990                )));
11991            }
11992        }
11993        // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
11994        // when the column declares `VECTOR(N) USING SQ8` and
11995        // the INSERT VALUES expression yields a raw f32 vector
11996        // (the normal pgvector-shape literal). Dim mismatch
11997        // falls through the `_ => None` arm and surfaces as
11998        // `TypeMismatch` with the expected SQ8 column type —
11999        // matching the F32 path's existing error.
12000        (
12001            Value::Vector(v),
12002            DataType::Vector {
12003                dim,
12004                encoding: VecEncoding::Sq8,
12005            },
12006        ) if v.len() == dim as usize => Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v))),
12007        // v6.0.3: f32 → f16 INSERT-time conversion for HALF
12008        // columns. Bit-exact at the storage layer (modulo
12009        // half-precision rounding); no rerank pass needed at
12010        // search time.
12011        (
12012            Value::Vector(v),
12013            DataType::Vector {
12014                dim,
12015                encoding: VecEncoding::F16,
12016            },
12017        ) if v.len() == dim as usize => Some(Value::HalfVector(
12018            spg_storage::halfvec::HalfVector::from_f32_slice(&v),
12019        )),
12020        // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
12021        // is already longer we reject (PG truncates trailing-space-only;
12022        // staying strict for v1).
12023        (Value::Text(s), DataType::Char(size)) => {
12024            let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
12025            if len > size {
12026                return Err(EngineError::Unsupported(alloc::format!(
12027                    "value for CHAR({size}) column `{col_name}` exceeds length: \
12028                     {len} chars"
12029                )));
12030            }
12031            let need = (size - len) as usize;
12032            let mut padded = s;
12033            padded.reserve(need);
12034            for _ in 0..need {
12035                padded.push(' ');
12036            }
12037            Some(Value::Text(padded))
12038        }
12039        _ => None,
12040    };
12041    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
12042        column: col_name.into(),
12043        expected,
12044        actual,
12045        position,
12046    }))
12047}
12048
12049/// v7.12.4 — render a function arg list into the
12050/// canonical form the storage layer caches as
12051/// [`spg_storage::FunctionDef::args_repr`]. The catalogue uses
12052/// this string for both display + as a coarse signature key
12053/// for the (deferred) overload resolution v7.12.5+ adds.
12054fn render_function_args(args: &[spg_sql::ast::FunctionArg]) -> alloc::string::String {
12055    use core::fmt::Write;
12056    let mut out = alloc::string::String::from("(");
12057    for (i, a) in args.iter().enumerate() {
12058        if i > 0 {
12059            out.push_str(", ");
12060        }
12061        match a.mode {
12062            spg_sql::ast::FunctionArgMode::In => {}
12063            spg_sql::ast::FunctionArgMode::Out => out.push_str("OUT "),
12064            spg_sql::ast::FunctionArgMode::InOut => out.push_str("INOUT "),
12065        }
12066        if let Some(n) = &a.name {
12067            out.push_str(n);
12068            out.push(' ');
12069        }
12070        match &a.ty {
12071            spg_sql::ast::FunctionArgType::Typed(t) => {
12072                let _ = write!(out, "{t}");
12073            }
12074            spg_sql::ast::FunctionArgType::Raw(s) => out.push_str(s),
12075        }
12076    }
12077    out.push(')');
12078    out
12079}
12080
12081#[cfg(test)]
12082mod tests {
12083    use super::*;
12084    use alloc::vec;
12085
12086    fn unwrap_command_ok(r: &QueryResult) -> usize {
12087        match r {
12088            QueryResult::CommandOk { affected, .. } => *affected,
12089            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
12090        }
12091    }
12092
12093    #[test]
12094    fn create_table_registers_schema() {
12095        let mut e = Engine::new();
12096        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
12097            .unwrap();
12098        assert_eq!(e.catalog().table_count(), 1);
12099        let t = e.catalog().get("foo").unwrap();
12100        assert_eq!(t.schema().columns.len(), 2);
12101        assert_eq!(t.schema().columns[0].ty, DataType::Int);
12102        assert!(!t.schema().columns[0].nullable);
12103        assert_eq!(t.schema().columns[1].ty, DataType::Text);
12104    }
12105
12106    #[test]
12107    fn create_table_vector_default_is_f32_encoded() {
12108        let mut e = Engine::new();
12109        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
12110        let t = e.catalog().get("t").unwrap();
12111        assert_eq!(
12112            t.schema().columns[0].ty,
12113            DataType::Vector {
12114                dim: 8,
12115                encoding: VecEncoding::F32,
12116            },
12117        );
12118    }
12119
12120    #[test]
12121    fn create_table_vector_using_sq8_succeeds() {
12122        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
12123        // is lifted. CREATE TABLE persists an SQ8 column type in
12124        // the catalog; INSERT (next test) quantises raw f32 input.
12125        let mut e = Engine::new();
12126        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
12127        let t = e.catalog().get("t").unwrap();
12128        assert_eq!(
12129            t.schema().columns[0].ty,
12130            DataType::Vector {
12131                dim: 8,
12132                encoding: VecEncoding::Sq8,
12133            },
12134        );
12135    }
12136
12137    #[test]
12138    fn insert_into_sq8_column_quantises_f32_payload() {
12139        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
12140        // `Value::Vector(Vec<f32>)` literal into the column's
12141        // quantised representation. The row that lands in the
12142        // catalog must therefore hold a `Value::Sq8Vector`, not the
12143        // original f32 buffer — that's the bit that delivers the
12144        // 4× compression target.
12145        let mut e = Engine::new();
12146        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
12147        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
12148            .unwrap();
12149        let t = e.catalog().get("t").unwrap();
12150        assert_eq!(t.rows().len(), 1);
12151        match &t.rows()[0].values[0] {
12152            Value::Sq8Vector(q) => {
12153                assert_eq!(q.bytes.len(), 4);
12154                // min/max are derived from the payload: min=0.0, max=1.0.
12155                assert!((q.min - 0.0).abs() < 1e-6);
12156                assert!((q.max - 1.0).abs() < 1e-6);
12157            }
12158            other => panic!("expected Sq8Vector cell, got {other:?}"),
12159        }
12160    }
12161
12162    #[test]
12163    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
12164        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
12165        // converts the incoming `Value::Vector(Vec<f32>)` cell
12166        // into `Value::HalfVector(HalfVector)` via the new
12167        // `coerce_value` arm. The dequantised round-trip is
12168        // bit-exact for f16-representable values, so 0.0 / 0.25
12169        // / 0.5 / 1.0 hit their grid points exactly.
12170        let mut e = Engine::new();
12171        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
12172            .unwrap();
12173        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
12174            .unwrap();
12175        let t = e.catalog().get("t").unwrap();
12176        assert_eq!(t.rows().len(), 1);
12177        match &t.rows()[0].values[0] {
12178            Value::HalfVector(h) => {
12179                assert_eq!(h.dim(), 4);
12180                let back = h.to_f32_vec();
12181                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
12182                for (g, e) in back.iter().zip(expected.iter()) {
12183                    assert!(
12184                        (g - e).abs() < 1e-6,
12185                        "{g} vs {e} should be exact on f16 grid"
12186                    );
12187                }
12188            }
12189            other => panic!("expected HalfVector cell, got {other:?}"),
12190        }
12191    }
12192
12193    #[test]
12194    fn alter_index_rebuild_in_place_succeeds() {
12195        // v6.0.4: bare REBUILD (no encoding switch) walks every
12196        // row again to rebuild the NSW graph. Verifies the engine
12197        // dispatch + storage helper plumbing without changing any
12198        // cell encoding.
12199        let mut e = Engine::new();
12200        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
12201            .unwrap();
12202        for i in 0..8_i32 {
12203            #[allow(clippy::cast_precision_loss)]
12204            let base = (i as f32) * 0.1;
12205            e.execute(&alloc::format!(
12206                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
12207                b1 = base + 0.01,
12208                b2 = base + 0.02,
12209            ))
12210            .unwrap();
12211        }
12212        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
12213        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
12214        // Schema encoding stays F32 (no encoding clause).
12215        assert_eq!(
12216            e.catalog().get("t").unwrap().schema().columns[1].ty,
12217            DataType::Vector {
12218                dim: 3,
12219                encoding: VecEncoding::F32,
12220            },
12221        );
12222    }
12223
12224    #[test]
12225    fn alter_index_rebuild_with_encoding_switches_cell_type() {
12226        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
12227        // stored cell from F32 → SQ8 + rebuilds the graph atop the
12228        // new encoding. Post-rebuild, cells must be Sq8Vector and
12229        // the schema must report encoding = Sq8.
12230        let mut e = Engine::new();
12231        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
12232            .unwrap();
12233        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
12234            .unwrap();
12235        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
12236        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
12237            .unwrap();
12238        let t = e.catalog().get("t").unwrap();
12239        assert_eq!(
12240            t.schema().columns[1].ty,
12241            DataType::Vector {
12242                dim: 4,
12243                encoding: VecEncoding::Sq8,
12244            },
12245        );
12246        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
12247    }
12248
12249    #[test]
12250    fn alter_index_rebuild_unknown_index_errors() {
12251        let mut e = Engine::new();
12252        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
12253        assert!(
12254            matches!(
12255                &err,
12256                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
12257            ),
12258            "got: {err}"
12259        );
12260    }
12261
12262    #[test]
12263    fn alter_index_rebuild_on_btree_index_errors() {
12264        // REBUILD on a B-tree index has no semantic meaning in
12265        // v6.0.4 — rejected at the storage layer with `Unsupported`.
12266        let mut e = Engine::new();
12267        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12268        e.execute("INSERT INTO t VALUES (1)").unwrap();
12269        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
12270        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
12271        assert!(
12272            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
12273            "got: {err}"
12274        );
12275    }
12276
12277    #[test]
12278    fn prepared_insert_substitutes_placeholders() {
12279        // v6.1.1: prepare() parses once; execute_prepared() walks the
12280        // AST and replaces $1/$2 with the param Values BEFORE the
12281        // dispatch sees them. Same logical result as a simple-query
12282        // INSERT, but parse happens once per *statement*, not per
12283        // execution.
12284        let mut e = Engine::new();
12285        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
12286            .unwrap();
12287        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
12288        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
12289            e.execute_prepared(stmt.clone(), &[Value::Int(id), Value::Text(name.into())])
12290                .unwrap();
12291        }
12292        // Read back via simple-query SELECT.
12293        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
12294        let QueryResult::Rows { rows, .. } = rows_result else {
12295            panic!("expected Rows")
12296        };
12297        assert_eq!(rows.len(), 3);
12298    }
12299
12300    #[test]
12301    fn prepared_select_with_placeholder_filters_rows() {
12302        let mut e = Engine::new();
12303        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
12304            .unwrap();
12305        for i in 0..10_i32 {
12306            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
12307                .unwrap();
12308        }
12309        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
12310        let QueryResult::Rows { rows, .. } = e.execute_prepared(stmt, &[Value::Int(35)]).unwrap()
12311        else {
12312            panic!("expected Rows")
12313        };
12314        // v = 35 means i*7 = 35 → i = 5.
12315        assert_eq!(rows.len(), 1);
12316        assert_eq!(rows[0].values[0], Value::Int(5));
12317    }
12318
12319    #[test]
12320    fn prepared_too_few_params_errors() {
12321        let mut e = Engine::new();
12322        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12323        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
12324        let err = e.execute_prepared(stmt, &[]).unwrap_err();
12325        assert!(
12326            matches!(
12327                &err,
12328                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
12329            ),
12330            "got: {err}"
12331        );
12332    }
12333
12334    #[test]
12335    fn insert_into_half_column_dim_mismatch_errors() {
12336        let mut e = Engine::new();
12337        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
12338            .unwrap();
12339        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
12340        assert!(matches!(
12341            &err,
12342            EngineError::Storage(StorageError::TypeMismatch { .. })
12343        ));
12344    }
12345
12346    #[test]
12347    fn insert_into_sq8_column_dim_mismatch_errors() {
12348        // Dim mismatch falls through the `coerce_value` Vector→Sq8
12349        // arm's guard and surfaces as `TypeMismatch` — the same
12350        // error the F32 path produces today, so client error
12351        // handling stays uniform across encodings.
12352        let mut e = Engine::new();
12353        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
12354        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
12355        assert!(
12356            matches!(
12357                &err,
12358                EngineError::Storage(StorageError::TypeMismatch { .. })
12359            ),
12360            "got: {err}",
12361        );
12362    }
12363
12364    #[test]
12365    fn create_table_duplicate_errors() {
12366        let mut e = Engine::new();
12367        e.execute("CREATE TABLE foo (a INT)").unwrap();
12368        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
12369        assert!(matches!(
12370            err,
12371            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
12372        ));
12373    }
12374
12375    #[test]
12376    fn insert_into_unknown_table_errors() {
12377        let mut e = Engine::new();
12378        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
12379        assert!(matches!(
12380            err,
12381            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
12382        ));
12383    }
12384
12385    #[test]
12386    fn insert_happy_path_reports_one_affected() {
12387        let mut e = Engine::new();
12388        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12389        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
12390        assert_eq!(unwrap_command_ok(&r), 1);
12391        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
12392    }
12393
12394    #[test]
12395    fn insert_arity_mismatch_propagates() {
12396        let mut e = Engine::new();
12397        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
12398        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
12399        assert!(matches!(
12400            err,
12401            EngineError::Storage(StorageError::ArityMismatch { .. })
12402        ));
12403    }
12404
12405    #[test]
12406    fn insert_negative_integer_via_unary_minus() {
12407        let mut e = Engine::new();
12408        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12409        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
12410        let rows = e.catalog().get("foo").unwrap().rows();
12411        assert_eq!(rows[0].values[0], Value::Int(-7));
12412    }
12413
12414    #[test]
12415    fn insert_non_literal_expr_unsupported() {
12416        let mut e = Engine::new();
12417        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12418        let err = e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap_err();
12419        assert!(matches!(err, EngineError::Unsupported(_)));
12420    }
12421
12422    #[test]
12423    fn select_star_returns_all_rows_in_insertion_order() {
12424        let mut e = Engine::new();
12425        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
12426            .unwrap();
12427        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
12428        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
12429        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
12430
12431        let r = e.execute("SELECT * FROM foo").unwrap();
12432        let QueryResult::Rows { columns, rows } = r else {
12433            panic!("expected Rows")
12434        };
12435        assert_eq!(columns.len(), 2);
12436        assert_eq!(columns[0].name, "a");
12437        assert_eq!(rows.len(), 3);
12438        assert_eq!(
12439            rows[1].values,
12440            vec![Value::Int(2), Value::Text("two".into())]
12441        );
12442    }
12443
12444    #[test]
12445    fn select_star_on_empty_table_returns_zero_rows() {
12446        let mut e = Engine::new();
12447        e.execute("CREATE TABLE foo (a INT)").unwrap();
12448        let r = e.execute("SELECT * FROM foo").unwrap();
12449        match r {
12450            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
12451            QueryResult::CommandOk { .. } => panic!("expected Rows"),
12452        }
12453    }
12454
12455    // --- v0.4: WHERE + projection ------------------------------------------
12456
12457    fn make_three_row_users(e: &mut Engine) {
12458        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
12459            .unwrap();
12460        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
12461            .unwrap();
12462        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
12463            .unwrap();
12464        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
12465            .unwrap();
12466    }
12467
12468    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
12469        match r {
12470            QueryResult::Rows { columns, rows } => (columns, rows),
12471            QueryResult::CommandOk { .. } => panic!("expected Rows"),
12472        }
12473    }
12474
12475    #[test]
12476    fn where_filter_passes_only_true_rows() {
12477        let mut e = Engine::new();
12478        make_three_row_users(&mut e);
12479        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
12480        let (_, rows) = unwrap_rows(r);
12481        assert_eq!(rows.len(), 2);
12482        assert_eq!(rows[0].values[0], Value::Int(2));
12483        assert_eq!(rows[1].values[0], Value::Int(3));
12484    }
12485
12486    #[test]
12487    fn where_with_null_result_filters_out_row() {
12488        let mut e = Engine::new();
12489        make_three_row_users(&mut e);
12490        // score is NULL for bob → score > 80 is NULL → row excluded
12491        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
12492        let (_, rows) = unwrap_rows(r);
12493        assert_eq!(rows.len(), 1);
12494        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
12495    }
12496
12497    #[test]
12498    fn projection_named_columns() {
12499        let mut e = Engine::new();
12500        make_three_row_users(&mut e);
12501        let r = e.execute("SELECT name, score FROM users").unwrap();
12502        let (cols, rows) = unwrap_rows(r);
12503        assert_eq!(cols.len(), 2);
12504        assert_eq!(cols[0].name, "name");
12505        assert_eq!(cols[1].name, "score");
12506        assert_eq!(rows.len(), 3);
12507        assert_eq!(
12508            rows[0].values,
12509            vec![Value::Text("alice".into()), Value::Int(90)]
12510        );
12511    }
12512
12513    #[test]
12514    fn projection_with_column_alias() {
12515        let mut e = Engine::new();
12516        make_three_row_users(&mut e);
12517        let r = e
12518            .execute("SELECT name AS who FROM users WHERE id = 1")
12519            .unwrap();
12520        let (cols, rows) = unwrap_rows(r);
12521        assert_eq!(cols[0].name, "who");
12522        assert_eq!(rows.len(), 1);
12523        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
12524    }
12525
12526    #[test]
12527    fn qualified_column_with_table_alias_resolves() {
12528        let mut e = Engine::new();
12529        make_three_row_users(&mut e);
12530        let r = e
12531            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
12532            .unwrap();
12533        let (cols, rows) = unwrap_rows(r);
12534        assert_eq!(cols.len(), 2);
12535        assert_eq!(rows.len(), 2);
12536    }
12537
12538    #[test]
12539    fn qualified_column_with_wrong_alias_errors() {
12540        let mut e = Engine::new();
12541        make_three_row_users(&mut e);
12542        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
12543        assert!(matches!(
12544            err,
12545            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
12546        ));
12547    }
12548
12549    #[test]
12550    fn select_unknown_column_errors_in_projection() {
12551        let mut e = Engine::new();
12552        make_three_row_users(&mut e);
12553        let err = e.execute("SELECT ghost FROM users").unwrap_err();
12554        assert!(matches!(
12555            err,
12556            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
12557        ));
12558    }
12559
12560    #[test]
12561    fn where_unknown_column_errors() {
12562        let mut e = Engine::new();
12563        make_three_row_users(&mut e);
12564        let err = e
12565            .execute("SELECT * FROM users WHERE ghost = 1")
12566            .unwrap_err();
12567        assert!(matches!(
12568            err,
12569            EngineError::Eval(EvalError::ColumnNotFound { .. })
12570        ));
12571    }
12572
12573    #[test]
12574    fn expression_projection_evaluates_and_renders() {
12575        // Compound expressions in the SELECT list are evaluated per row;
12576        // the output column is typed TEXT, name defaults to the expression.
12577        let mut e = Engine::new();
12578        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
12579        e.execute("INSERT INTO t VALUES (3)").unwrap();
12580        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
12581        assert_eq!(rows.len(), 1);
12582        // The expression evaluates to integer 3; rendered as the cell value
12583        // (storage::Value::Int(3) since arithmetic kept ints).
12584        assert_eq!(rows[0].values[0], Value::Int(3));
12585    }
12586
12587    #[test]
12588    fn select_unknown_table_errors() {
12589        let mut e = Engine::new();
12590        let err = e.execute("SELECT * FROM ghost").unwrap_err();
12591        assert!(matches!(
12592            err,
12593            EngineError::Storage(StorageError::TableNotFound { .. })
12594        ));
12595    }
12596
12597    #[test]
12598    fn invalid_sql_returns_parse_error() {
12599        // v4.4: UPDATE is now real SQL, so use a true syntactic
12600        // garbage payload for the parse-error path.
12601        let mut e = Engine::new();
12602        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
12603        assert!(matches!(err, EngineError::Parse(_)));
12604    }
12605
12606    // --- v0.8 CREATE INDEX + index seek ------------------------------------
12607
12608    #[test]
12609    fn create_index_registers_on_table() {
12610        let mut e = Engine::new();
12611        make_three_row_users(&mut e);
12612        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
12613        let t = e.catalog().get("users").unwrap();
12614        assert_eq!(t.indices().len(), 1);
12615        assert_eq!(t.indices()[0].name, "by_name");
12616    }
12617
12618    #[test]
12619    fn create_index_on_unknown_table_errors() {
12620        let mut e = Engine::new();
12621        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
12622        assert!(matches!(
12623            err,
12624            EngineError::Storage(StorageError::TableNotFound { .. })
12625        ));
12626    }
12627
12628    #[test]
12629    fn create_index_on_unknown_column_errors() {
12630        let mut e = Engine::new();
12631        make_three_row_users(&mut e);
12632        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
12633        assert!(matches!(
12634            err,
12635            EngineError::Storage(StorageError::ColumnNotFound { .. })
12636        ));
12637    }
12638
12639    #[test]
12640    fn select_eq_uses_index_returns_same_rows_as_scan() {
12641        // Build two engines: one with an index, one without. Same query →
12642        // same row set (index is a planner optimisation, not a semantic
12643        // change).
12644        let mut without = Engine::new();
12645        make_three_row_users(&mut without);
12646        let mut with = Engine::new();
12647        make_three_row_users(&mut with);
12648        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
12649
12650        let q = "SELECT * FROM users WHERE id = 2";
12651        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
12652        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
12653        assert_eq!(no_idx_rows, idx_rows);
12654        assert_eq!(idx_rows.len(), 1);
12655    }
12656
12657    #[test]
12658    fn select_eq_with_no_matching_index_value_returns_empty() {
12659        let mut e = Engine::new();
12660        make_three_row_users(&mut e);
12661        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
12662        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
12663        assert_eq!(rows.len(), 0);
12664    }
12665
12666    // --- v0.9 transactions -------------------------------------------------
12667
12668    #[test]
12669    fn begin_sets_in_transaction_flag() {
12670        let mut e = Engine::new();
12671        assert!(!e.in_transaction());
12672        e.execute("BEGIN").unwrap();
12673        assert!(e.in_transaction());
12674    }
12675
12676    #[test]
12677    fn double_begin_errors() {
12678        let mut e = Engine::new();
12679        e.execute("BEGIN").unwrap();
12680        let err = e.execute("BEGIN").unwrap_err();
12681        assert_eq!(err, EngineError::TransactionAlreadyOpen);
12682    }
12683
12684    #[test]
12685    fn commit_without_begin_errors() {
12686        let mut e = Engine::new();
12687        let err = e.execute("COMMIT").unwrap_err();
12688        assert_eq!(err, EngineError::NoActiveTransaction);
12689    }
12690
12691    #[test]
12692    fn rollback_without_begin_errors() {
12693        let mut e = Engine::new();
12694        let err = e.execute("ROLLBACK").unwrap_err();
12695        assert_eq!(err, EngineError::NoActiveTransaction);
12696    }
12697
12698    #[test]
12699    fn commit_applies_shadow_to_committed_catalog() {
12700        let mut e = Engine::new();
12701        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12702        e.execute("BEGIN").unwrap();
12703        e.execute("INSERT INTO t VALUES (1)").unwrap();
12704        e.execute("INSERT INTO t VALUES (2)").unwrap();
12705        e.execute("COMMIT").unwrap();
12706        assert!(!e.in_transaction());
12707        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
12708    }
12709
12710    #[test]
12711    fn rollback_discards_shadow() {
12712        let mut e = Engine::new();
12713        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12714        e.execute("BEGIN").unwrap();
12715        e.execute("INSERT INTO t VALUES (1)").unwrap();
12716        e.execute("INSERT INTO t VALUES (2)").unwrap();
12717        e.execute("ROLLBACK").unwrap();
12718        assert!(!e.in_transaction());
12719        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
12720    }
12721
12722    #[test]
12723    fn select_during_tx_sees_uncommitted_writes_own_session() {
12724        // The shadow catalog is read by SELECTs while a TX is open — the
12725        // session can see its own pending writes.
12726        let mut e = Engine::new();
12727        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12728        e.execute("BEGIN").unwrap();
12729        e.execute("INSERT INTO t VALUES (42)").unwrap();
12730        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
12731        assert_eq!(rows.len(), 1);
12732        assert_eq!(rows[0].values[0], Value::Int(42));
12733    }
12734
12735    #[test]
12736    fn snapshot_with_no_users_is_bare_catalog_format() {
12737        let mut e = Engine::new();
12738        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12739        let bytes = e.snapshot();
12740        assert_eq!(
12741            &bytes[..8],
12742            b"SPGDB001",
12743            "must be the bare v3.x catalog magic"
12744        );
12745        let e2 = Engine::restore_envelope(&bytes).unwrap();
12746        assert!(e2.users().is_empty());
12747        assert_eq!(e2.catalog().table_count(), 1);
12748    }
12749
12750    #[test]
12751    fn snapshot_with_users_round_trips_both_via_envelope() {
12752        let mut e = Engine::new();
12753        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12754        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
12755        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
12756            .unwrap();
12757        let bytes = e.snapshot();
12758        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
12759        let e2 = Engine::restore_envelope(&bytes).unwrap();
12760        assert_eq!(e2.users().len(), 2);
12761        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
12762        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
12763        assert_eq!(e2.verify_user("alice", "wrong"), None);
12764        assert_eq!(e2.catalog().table_count(), 1);
12765    }
12766
12767    #[test]
12768    fn ddl_inside_tx_also_rolled_back() {
12769        let mut e = Engine::new();
12770        e.execute("BEGIN").unwrap();
12771        e.execute("CREATE TABLE t (v INT)").unwrap();
12772        // Visible inside the TX.
12773        e.execute("SELECT * FROM t").unwrap();
12774        e.execute("ROLLBACK").unwrap();
12775        // Gone after rollback.
12776        let err = e.execute("SELECT * FROM t").unwrap_err();
12777        assert!(matches!(
12778            err,
12779            EngineError::Storage(StorageError::TableNotFound { .. })
12780        ));
12781    }
12782
12783    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
12784
12785    #[test]
12786    fn create_publication_lands_in_catalog() {
12787        let mut e = Engine::new();
12788        assert!(e.publications().is_empty());
12789        e.execute("CREATE PUBLICATION pub_a").unwrap();
12790        assert_eq!(e.publications().len(), 1);
12791        assert!(e.publications().contains("pub_a"));
12792    }
12793
12794    #[test]
12795    fn create_publication_duplicate_errors() {
12796        let mut e = Engine::new();
12797        e.execute("CREATE PUBLICATION pub_a").unwrap();
12798        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
12799        assert!(
12800            alloc::format!("{err:?}").contains("DuplicateName"),
12801            "got {err:?}"
12802        );
12803    }
12804
12805    #[test]
12806    fn drop_publication_silent_when_absent() {
12807        let mut e = Engine::new();
12808        // PG-compatible: DROP a publication that doesn't exist
12809        // succeeds (no-op) but reports zero affected.
12810        let r = e.execute("DROP PUBLICATION nope").unwrap();
12811        match r {
12812            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
12813            other => panic!("expected CommandOk, got {other:?}"),
12814        }
12815    }
12816
12817    #[test]
12818    fn drop_publication_present_reports_one_affected() {
12819        let mut e = Engine::new();
12820        e.execute("CREATE PUBLICATION pub_a").unwrap();
12821        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
12822        match r {
12823            QueryResult::CommandOk {
12824                affected,
12825                modified_catalog,
12826            } => {
12827                assert_eq!(affected, 1);
12828                assert!(modified_catalog);
12829            }
12830            other => panic!("expected CommandOk, got {other:?}"),
12831        }
12832        assert!(e.publications().is_empty());
12833    }
12834
12835    #[test]
12836    fn publications_persist_across_snapshot_restore() {
12837        // The persist-across-restart ship-gate at the engine layer —
12838        // snapshot → restore_envelope round trip must preserve the
12839        // publication catalog. The spg-server e2e covers the
12840        // process-restart variant.
12841        let mut e = Engine::new();
12842        e.execute("CREATE PUBLICATION pub_a").unwrap();
12843        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES")
12844            .unwrap();
12845        let snap = e.snapshot();
12846        let e2 = Engine::restore_envelope(&snap).unwrap();
12847        assert_eq!(e2.publications().len(), 2);
12848        assert!(e2.publications().contains("pub_a"));
12849        assert!(e2.publications().contains("pub_b"));
12850    }
12851
12852    #[test]
12853    fn create_publication_allowed_inside_transaction() {
12854        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
12855        // CREATE PUBLICATION inside a TX and the auto-commit
12856        // wrap path needs the same allowance.
12857        let mut e = Engine::new();
12858        e.execute("BEGIN").unwrap();
12859        e.execute("CREATE PUBLICATION pub_a").unwrap();
12860        e.execute("COMMIT").unwrap();
12861        assert!(e.publications().contains("pub_a"));
12862    }
12863
12864    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
12865
12866    #[test]
12867    fn create_publication_for_table_list_lands_with_scope() {
12868        let mut e = Engine::new();
12869        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
12870        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
12871        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
12872            .unwrap();
12873        let scope = e.publications().get("pub_a").cloned();
12874        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
12875            panic!("expected ForTables scope, got {scope:?}")
12876        };
12877        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12878    }
12879
12880    #[test]
12881    fn create_publication_all_tables_except_lands_with_scope() {
12882        let mut e = Engine::new();
12883        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
12884            .unwrap();
12885        let scope = e.publications().get("pub_a").cloned();
12886        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
12887            panic!("expected AllTablesExcept scope, got {scope:?}")
12888        };
12889        assert_eq!(ts, alloc::vec!["t3".to_string()]);
12890    }
12891
12892    #[test]
12893    fn show_publications_empty_returns_zero_rows() {
12894        let e = Engine::new();
12895        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12896        let QueryResult::Rows { rows, columns } = r else {
12897            panic!()
12898        };
12899        assert!(rows.is_empty());
12900        assert_eq!(columns.len(), 3);
12901        assert_eq!(columns[0].name, "name");
12902        assert_eq!(columns[1].name, "scope");
12903        assert_eq!(columns[2].name, "table_count");
12904    }
12905
12906    #[test]
12907    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
12908        let mut e = Engine::new();
12909        e.execute("CREATE PUBLICATION z_pub").unwrap();
12910        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
12911            .unwrap();
12912        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
12913            .unwrap();
12914        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12915        let QueryResult::Rows { rows, .. } = r else {
12916            panic!()
12917        };
12918        assert_eq!(rows.len(), 3);
12919        // Alphabetical order: a_pub, m_pub, z_pub.
12920        let names: Vec<&str> = rows
12921            .iter()
12922            .map(|r| {
12923                if let Value::Text(s) = &r.values[0] {
12924                    s.as_str()
12925                } else {
12926                    panic!()
12927                }
12928            })
12929            .collect();
12930        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
12931        // Row 0 — a_pub scope summary + table_count = 2.
12932        match &rows[0].values[1] {
12933            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
12934            other => panic!("expected Text, got {other:?}"),
12935        }
12936        assert_eq!(rows[0].values[2], Value::Int(2));
12937        // Row 1 — m_pub.
12938        match &rows[1].values[1] {
12939            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
12940            other => panic!("expected Text, got {other:?}"),
12941        }
12942        assert_eq!(rows[1].values[2], Value::Int(1));
12943        // Row 2 — z_pub (AllTables → NULL count).
12944        match &rows[2].values[1] {
12945            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
12946            other => panic!("expected Text, got {other:?}"),
12947        }
12948        assert_eq!(rows[2].values[2], Value::Null);
12949    }
12950
12951    #[test]
12952    fn for_list_scopes_persist_across_snapshot() {
12953        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
12954        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
12955        let mut e = Engine::new();
12956        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
12957        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
12958            .unwrap();
12959        let snap = e.snapshot();
12960        let e2 = Engine::restore_envelope(&snap).unwrap();
12961        assert_eq!(e2.publications().len(), 2);
12962        let p1 = e2.publications().get("p1").cloned();
12963        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
12964            panic!("p1 scope lost: {p1:?}")
12965        };
12966        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12967        let p2 = e2.publications().get("p2").cloned();
12968        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
12969            panic!("p2 scope lost: {p2:?}")
12970        };
12971        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
12972    }
12973
12974    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
12975
12976    #[test]
12977    fn create_subscription_lands_in_catalog_with_defaults() {
12978        let mut e = Engine::new();
12979        e.execute(
12980            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
12981        )
12982        .unwrap();
12983        let s = e.subscriptions().get("sub_a").cloned().expect("present");
12984        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
12985        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
12986        assert!(s.enabled);
12987        assert_eq!(s.last_received_pos, 0);
12988    }
12989
12990    #[test]
12991    fn create_subscription_duplicate_name_errors() {
12992        let mut e = Engine::new();
12993        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
12994            .unwrap();
12995        let err = e
12996            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
12997            .unwrap_err();
12998        assert!(
12999            alloc::format!("{err:?}").contains("DuplicateName"),
13000            "got {err:?}"
13001        );
13002    }
13003
13004    #[test]
13005    fn drop_subscription_silent_when_absent() {
13006        let mut e = Engine::new();
13007        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
13008        match r {
13009            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
13010            other => panic!("expected CommandOk, got {other:?}"),
13011        }
13012    }
13013
13014    #[test]
13015    fn subscription_advance_updates_last_pos_monotone() {
13016        let mut e = Engine::new();
13017        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
13018            .unwrap();
13019        assert!(e.subscription_advance("s", 100));
13020        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
13021        assert!(e.subscription_advance("s", 50)); // stale → ignored
13022        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
13023        assert!(e.subscription_advance("s", 200));
13024        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
13025        assert!(!e.subscription_advance("missing", 1));
13026    }
13027
13028    #[test]
13029    fn show_subscriptions_returns_rows_ordered_by_name() {
13030        let mut e = Engine::new();
13031        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
13032            .unwrap();
13033        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
13034            .unwrap();
13035        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
13036        let QueryResult::Rows { rows, columns } = r else {
13037            panic!()
13038        };
13039        assert_eq!(rows.len(), 2);
13040        assert_eq!(columns.len(), 5);
13041        assert_eq!(columns[0].name, "name");
13042        assert_eq!(columns[4].name, "last_received_pos");
13043        // Alphabetical: a_sub, z_sub.
13044        let names: Vec<&str> = rows
13045            .iter()
13046            .map(|r| {
13047                if let Value::Text(s) = &r.values[0] {
13048                    s.as_str()
13049                } else {
13050                    panic!()
13051                }
13052            })
13053            .collect();
13054        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
13055        // Row 0: a_sub
13056        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
13057        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
13058        assert_eq!(rows[0].values[3], Value::Bool(true));
13059        assert_eq!(rows[0].values[4], Value::BigInt(0));
13060        // Row 1: z_sub — publications join with ", "
13061        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
13062    }
13063
13064    #[test]
13065    fn subscriptions_persist_across_snapshot_envelope_v4() {
13066        let mut e = Engine::new();
13067        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
13068            .unwrap();
13069        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
13070            .unwrap();
13071        e.subscription_advance("s2", 42);
13072        let snap = e.snapshot();
13073        let e2 = Engine::restore_envelope(&snap).unwrap();
13074        assert_eq!(e2.subscriptions().len(), 2);
13075        let s1 = e2.subscriptions().get("s1").unwrap();
13076        assert_eq!(s1.conn_str, "h=A");
13077        assert_eq!(
13078            s1.publications,
13079            alloc::vec!["p1".to_string(), "p2".to_string()]
13080        );
13081        assert_eq!(s1.last_received_pos, 0);
13082        let s2 = e2.subscriptions().get("s2").unwrap();
13083        assert_eq!(s2.last_received_pos, 42);
13084    }
13085
13086    #[test]
13087    fn v3_envelope_loads_with_empty_subscriptions() {
13088        // v3 snapshot (publications-only). Forge it by hand so we
13089        // verify v6.1.4 readers don't panic — they must surface
13090        // empty subscriptions and a populated publication table.
13091        let mut e = Engine::new();
13092        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
13093        let catalog = e.catalog.serialize();
13094        let users = crate::users::serialize_users(&e.users);
13095        let pubs = e.publications.serialize();
13096        let mut buf = Vec::new();
13097        buf.extend_from_slice(b"SPGENV01");
13098        buf.push(3u8); // v3
13099        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13100        buf.extend_from_slice(&catalog);
13101        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13102        buf.extend_from_slice(&users);
13103        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
13104        buf.extend_from_slice(&pubs);
13105        let crc = spg_crypto::crc32::crc32(&buf);
13106        buf.extend_from_slice(&crc.to_le_bytes());
13107
13108        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
13109        assert!(e2.subscriptions().is_empty());
13110        assert!(e2.publications().contains("pub_legacy"));
13111    }
13112
13113    #[test]
13114    fn create_subscription_allowed_inside_transaction() {
13115        let mut e = Engine::new();
13116        e.execute("BEGIN").unwrap();
13117        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
13118            .unwrap();
13119        e.execute("COMMIT").unwrap();
13120        assert!(e.subscriptions().contains("s"));
13121    }
13122
13123    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
13124    #[test]
13125    fn analyze_populates_histogram_bounds() {
13126        let mut e = Engine::new();
13127        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)")
13128            .unwrap();
13129        for i in 0..50 {
13130            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'name{i}')"))
13131                .unwrap();
13132        }
13133        e.execute("ANALYZE t").unwrap();
13134        let stats = e.statistics();
13135        let id_stats = stats.get("t", "id").unwrap();
13136        assert!(id_stats.histogram_bounds.len() >= 2);
13137        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
13138        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
13139        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
13140        assert_eq!(id_stats.n_distinct, 50);
13141    }
13142
13143    #[test]
13144    fn reanalyze_overwrites_prior_stats() {
13145        let mut e = Engine::new();
13146        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13147        for i in 0..10 {
13148            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13149                .unwrap();
13150        }
13151        e.execute("ANALYZE t").unwrap();
13152        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
13153        assert_eq!(n1, 10);
13154        for i in 10..30 {
13155            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13156                .unwrap();
13157        }
13158        e.execute("ANALYZE t").unwrap();
13159        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
13160        assert_eq!(n2, 30);
13161    }
13162
13163    #[test]
13164    fn analyze_unknown_table_errors() {
13165        let mut e = Engine::new();
13166        let err = e.execute("ANALYZE nonexistent").unwrap_err();
13167        assert!(matches!(
13168            err,
13169            EngineError::Storage(StorageError::TableNotFound { .. })
13170        ));
13171    }
13172
13173    #[test]
13174    fn bare_analyze_covers_all_user_tables() {
13175        let mut e = Engine::new();
13176        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
13177        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
13178        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
13179        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
13180        let r = e.execute("ANALYZE").unwrap();
13181        match r {
13182            QueryResult::CommandOk {
13183                affected,
13184                modified_catalog,
13185            } => {
13186                assert_eq!(affected, 2);
13187                assert!(modified_catalog);
13188            }
13189            other => panic!("expected CommandOk, got {other:?}"),
13190        }
13191        assert!(e.statistics().get("t1", "id").is_some());
13192        assert!(e.statistics().get("t2", "name").is_some());
13193    }
13194
13195    #[test]
13196    fn select_from_spg_statistic_returns_rows_per_column() {
13197        let mut e = Engine::new();
13198        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
13199            .unwrap();
13200        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
13201        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
13202        e.execute("ANALYZE t").unwrap();
13203        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
13204        let QueryResult::Rows { rows, columns } = r else {
13205            panic!()
13206        };
13207        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
13208        assert_eq!(columns.len(), 6);
13209        assert_eq!(columns[0].name, "table_name");
13210        assert_eq!(columns[4].name, "histogram_bounds");
13211        assert_eq!(columns[5].name, "cold_row_count");
13212        assert_eq!(rows.len(), 2, "one row per column of t");
13213        // Sorted by (table_name, column_name).
13214        match (&rows[0].values[0], &rows[0].values[1]) {
13215            (Value::Text(t), Value::Text(c)) => {
13216                assert_eq!(t, "t");
13217                // BTreeMap orders (table, column); columns "id" < "label".
13218                assert_eq!(c, "id");
13219            }
13220            _ => panic!(),
13221        }
13222    }
13223
13224    #[test]
13225    fn analyze_skips_vector_columns() {
13226        // Vector columns have their own stats shape (HNSW graph);
13227        // ANALYZE leaves them out of spg_statistic.
13228        let mut e = Engine::new();
13229        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
13230            .unwrap();
13231        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
13232        e.execute("ANALYZE t").unwrap();
13233        assert!(e.statistics().get("t", "id").is_some());
13234        assert!(e.statistics().get("t", "v").is_none());
13235    }
13236
13237    #[test]
13238    fn statistics_persist_across_envelope_v5_round_trip() {
13239        let mut e = Engine::new();
13240        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13241        for i in 0..20 {
13242            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13243                .unwrap();
13244        }
13245        e.execute("ANALYZE").unwrap();
13246        let snap = e.snapshot();
13247        let e2 = Engine::restore_envelope(&snap).unwrap();
13248        let s = e2.statistics().get("t", "id").unwrap();
13249        assert_eq!(s.n_distinct, 20);
13250    }
13251
13252    // ── v6.2.1 auto-analyze threshold ───────────────────────────
13253
13254    #[test]
13255    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
13256        // For a table with 0 rows then 10 inserts → modified=10,
13257        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
13258        // after the 10th INSERT the threshold is met.
13259        let mut e = Engine::new();
13260        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13261        for i in 0..9 {
13262            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13263                .unwrap();
13264        }
13265        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
13266        e.execute("INSERT INTO t VALUES (9)").unwrap();
13267        let needs = e.tables_needing_analyze();
13268        assert_eq!(needs, alloc::vec!["t".to_string()]);
13269    }
13270
13271    #[test]
13272    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
13273        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
13274        // Each new INSERT bumps both modified and row_count, so to
13275        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
13276        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
13277        // more (200 total mods, row_count=1200, threshold=120 → fire).
13278        let mut e = Engine::new();
13279        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13280        for i in 0..1000 {
13281            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13282                .unwrap();
13283        }
13284        e.execute("ANALYZE t").unwrap();
13285        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
13286        for i in 1000..1050 {
13287            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13288                .unwrap();
13289        }
13290        assert!(
13291            e.tables_needing_analyze().is_empty(),
13292            "50 inserts < threshold of ~105"
13293        );
13294        for i in 1050..1200 {
13295            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13296                .unwrap();
13297        }
13298        assert_eq!(
13299            e.tables_needing_analyze(),
13300            alloc::vec!["t".to_string()],
13301            "200 inserts > 0.1 × 1200 threshold"
13302        );
13303    }
13304
13305    #[test]
13306    fn auto_analyze_threshold_resets_after_analyze() {
13307        let mut e = Engine::new();
13308        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13309        for i in 0..200 {
13310            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13311                .unwrap();
13312        }
13313        assert!(!e.tables_needing_analyze().is_empty());
13314        e.execute("ANALYZE").unwrap();
13315        assert!(
13316            e.tables_needing_analyze().is_empty(),
13317            "ANALYZE must reset the counter"
13318        );
13319    }
13320
13321    #[test]
13322    fn auto_analyze_threshold_tracks_updates_and_deletes() {
13323        let mut e = Engine::new();
13324        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
13325            .unwrap();
13326        for i in 0..50 {
13327            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
13328                .unwrap();
13329        }
13330        e.execute("ANALYZE t").unwrap();
13331        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
13332        // × max(50, 100) = 10. So 25 >= 10 → trigger.
13333        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
13334        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
13335        assert_eq!(e.tables_needing_analyze(), alloc::vec!["t".to_string()]);
13336    }
13337
13338    #[test]
13339    fn v4_envelope_loads_with_empty_statistics() {
13340        // Forge a v4 envelope by hand: catalog + users + pubs +
13341        // subs trailer, no statistics. A v6.2.0 reader must accept
13342        // it and surface an empty Statistics.
13343        let mut e = Engine::new();
13344        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
13345            .unwrap();
13346        let catalog = e.catalog.serialize();
13347        let users = crate::users::serialize_users(&e.users);
13348        let pubs = e.publications.serialize();
13349        let subs = e.subscriptions.serialize();
13350        let mut buf = Vec::new();
13351        buf.extend_from_slice(b"SPGENV01");
13352        buf.push(4u8);
13353        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13354        buf.extend_from_slice(&catalog);
13355        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13356        buf.extend_from_slice(&users);
13357        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
13358        buf.extend_from_slice(&pubs);
13359        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
13360        buf.extend_from_slice(&subs);
13361        let crc = spg_crypto::crc32::crc32(&buf);
13362        buf.extend_from_slice(&crc.to_le_bytes());
13363        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
13364        assert!(e2.statistics().is_empty());
13365    }
13366
13367    #[test]
13368    fn v1_v2_envelope_loads_with_empty_publications() {
13369        // A snapshot taken before v6.1.2 (no publication trailer,
13370        // envelope v2) must still deserialise — and the resulting
13371        // engine must report zero publications. Use the engine's own
13372        // round-trip with no publications: that emits v3 but with an
13373        // empty pubs block. Then forge a v2 envelope by hand to lock
13374        // the back-compat path.
13375        let mut e = Engine::new();
13376        // Force users to be non-empty so the snapshot takes the
13377        // envelope path rather than the bare-catalog fallback.
13378        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
13379            .unwrap();
13380
13381        // Forge an envelope v2: same shape as v3 but no pubs trailer.
13382        let catalog = e.catalog.serialize();
13383        let users = crate::users::serialize_users(&e.users);
13384        let mut buf = Vec::new();
13385        buf.extend_from_slice(b"SPGENV01");
13386        buf.push(2u8); // v2
13387        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13388        buf.extend_from_slice(&catalog);
13389        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13390        buf.extend_from_slice(&users);
13391        let crc = spg_crypto::crc32::crc32(&buf);
13392        buf.extend_from_slice(&crc.to_le_bytes());
13393
13394        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
13395        assert!(e2.publications().is_empty());
13396    }
13397}