Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod describe;
11pub mod eval;
12pub mod fts;
13pub mod json;
14pub mod memoize;
15pub mod plan_cache;
16pub mod publications;
17pub mod query_stats;
18pub mod reorder;
19pub mod selectivity;
20pub mod statistics;
21pub mod subscriptions;
22pub mod triggers;
23pub mod users;
24
25pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
26
27use alloc::borrow::Cow;
28use alloc::boxed::Box;
29use alloc::collections::BTreeMap;
30use alloc::string::{String, ToString};
31use alloc::vec::Vec;
32use core::fmt;
33
34use spg_sql::ast::{
35    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement, CreatePublicationStatement,
36    CreateSubscriptionStatement, CreateTableStatement, CreateUserStatement, Expr, FrameBound,
37    FrameKind, FromClause, IndexMethod, InsertStatement, JoinKind, Literal, OrderBy, SelectItem,
38    SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding as SqlVecEncoding,
39    WindowFrame,
40};
41// v7.16.0 — re-export the parsed-statement AST so downstream
42// crates (spg-embedded → spg-sqlx) don't need a direct dep on
43// spg-sql for the prepare/bind handle.
44pub use spg_sql::ast::Statement as ParsedStatement;
45use spg_sql::parser::{self, ParseError};
46use spg_storage::{
47    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
48    TableSchema, Value, VecEncoding,
49};
50
51use crate::eval::{EvalContext, EvalError};
52
53/// Result of executing one statement.
54#[derive(Debug, Clone, PartialEq)]
55#[non_exhaustive]
56pub enum QueryResult {
57    /// DDL or DML succeeded.
58    ///
59    /// `affected` is the row count for `INSERT` and 0 elsewhere.
60    /// `modified_catalog` tells the server whether this statement
61    /// caused the *committed* catalog to change — it's the signal to
62    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
63    /// statements executed inside a transaction (those only touch the
64    /// shadow), and true for `COMMIT` and for writes outside a TX.
65    CommandOk {
66        affected: usize,
67        modified_catalog: bool,
68    },
69    /// `SELECT` returned a (possibly empty) row set.
70    Rows {
71        columns: Vec<ColumnSchema>,
72        rows: Vec<Row>,
73    },
74}
75
76/// All errors the engine can return.
77///
78/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
79/// must include a `_` arm so new variants in subsequent v7.x releases
80/// are not breaking changes.
81#[derive(Debug, Clone, PartialEq)]
82#[non_exhaustive]
83pub enum EngineError {
84    Parse(ParseError),
85    Storage(StorageError),
86    Eval(EvalError),
87    /// Front-end accepted a construct that the v0.x executor doesn't support.
88    Unsupported(String),
89    /// `BEGIN` while another transaction is already open.
90    TransactionAlreadyOpen,
91    /// `COMMIT` / `ROLLBACK` with no active transaction.
92    NoActiveTransaction,
93    /// v4.0 sentinel: `execute_readonly` got a statement that
94    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
95    /// The caller should retake the write lock and dispatch through
96    /// `execute(&mut self)` instead.
97    WriteRequired,
98    /// v4.2: a SELECT would have returned more rows than the
99    /// configured `max_query_rows` cap. Carries the cap.
100    RowLimitExceeded(usize),
101    /// v4.5: cooperative cancellation — the host (server's
102    /// per-query watchdog) set the cancel flag while a long-running
103    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
104    /// is discarded; the caller should surface this as a timeout
105    /// to the client.
106    Cancelled,
107}
108
109impl fmt::Display for EngineError {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        match self {
112            Self::Parse(e) => write!(f, "parse: {e}"),
113            Self::Storage(e) => write!(f, "storage: {e}"),
114            Self::Eval(e) => write!(f, "eval: {e}"),
115            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
116            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
117            Self::NoActiveTransaction => f.write_str("no active transaction"),
118            Self::WriteRequired => {
119                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
120            }
121            Self::RowLimitExceeded(n) => {
122                write!(f, "query exceeded max_query_rows={n}")
123            }
124            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
125        }
126    }
127}
128
129impl From<ParseError> for EngineError {
130    fn from(e: ParseError) -> Self {
131        Self::Parse(e)
132    }
133}
134impl From<StorageError> for EngineError {
135    fn from(e: StorageError) -> Self {
136        Self::Storage(e)
137    }
138}
139impl From<EvalError> for EngineError {
140    fn from(e: EvalError) -> Self {
141        Self::Eval(e)
142    }
143}
144
145/// The execution engine. Holds the catalog and (later) other server-scope
146/// state. `Engine::new()` is intentionally cheap so callers can construct one
147/// per database, per test.
148/// Function pointer that returns "now" as microseconds since Unix
149/// epoch. The engine is `no_std`, so it can't reach for `std::time`
150/// itself — callers (`spg-server`, the sqllogictest runner) inject a
151/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
152/// `Unsupported`.
153pub type ClockFn = fn() -> i64;
154
155/// Function pointer that produces 16 cryptographically random bytes.
156/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
157/// itself — host (`spg-server`) injects an OS-backed source. `None`
158/// means SQL-driven `CREATE USER` falls back to a deterministic salt
159/// derived from the username (acceptable in tests; the server always
160/// installs a real RNG so production paths never see this).
161pub type SaltFn = fn() -> [u8; 16];
162
163/// v4.5 cooperative cancellation token. A long-running SELECT /
164/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
165/// and bails with `EngineError::Cancelled`. The host
166/// (`spg-server`) creates an `AtomicBool` per query, spawns a
167/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
168/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
169///
170/// `CancelToken::none()` is a no-op — used by the legacy `execute`
171/// and `execute_readonly` entry points so existing callers don't
172/// change.
173#[derive(Debug, Clone, Copy)]
174pub struct CancelToken<'a> {
175    flag: Option<&'a core::sync::atomic::AtomicBool>,
176}
177
178impl<'a> CancelToken<'a> {
179    #[must_use]
180    pub const fn none() -> Self {
181        Self { flag: None }
182    }
183
184    #[must_use]
185    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
186        Self { flag: Some(f) }
187    }
188
189    #[must_use]
190    pub fn is_cancelled(self) -> bool {
191        self.flag
192            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
193    }
194
195    /// Returns `Err(Cancelled)` if the token has been tripped.
196    /// Used at row-loop checkpoints to bail cooperatively without
197    /// scattering raw `is_cancelled` checks across the executor.
198    #[inline]
199    pub fn check(self) -> Result<(), EngineError> {
200        if self.is_cancelled() {
201            Err(EngineError::Cancelled)
202        } else {
203            Ok(())
204        }
205    }
206}
207
208// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
209// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
210//
211// Wraps a catalog blob + a user blob behind a small header so the
212// server can persist both atomically without inventing a new file.
213// Bare catalog blobs (v3.x) still load via `restore_envelope` since
214// the magic check fails fast and the function falls back to
215// `Catalog::deserialize`.
216//
217// Layout — v1 (v4.1, no CRC):
218//   [8 bytes magic "SPGENV01"]
219//   [u8 version = 1]
220//   [u32 catalog_len][catalog bytes]
221//   [u32 users_len][users bytes]
222//
223// Layout — v2 (v4.37, CRC32 of body):
224//   [8 bytes magic "SPGENV01"]
225//   [u8 version = 2]
226//   [u32 catalog_len][catalog bytes]
227//   [u32 users_len][users bytes]
228//   [u32 crc32]                      ← CRC32 of every byte before it.
229//
230// Layout — v3 (v6.1.2, publications trailer):
231//   [8 bytes magic "SPGENV01"]
232//   [u8 version = 3]
233//   [u32 catalog_len][catalog bytes]
234//   [u32 users_len][users bytes]
235//   [u32 pubs_len][publications bytes]
236//   [u32 crc32]
237//
238// Layout — v4 (v6.1.4, subscriptions trailer):
239//   [8 bytes magic "SPGENV01"]
240//   [u8 version = 4]
241//   [u32 catalog_len][catalog bytes]
242//   [u32 users_len][users bytes]
243//   [u32 pubs_len][publications bytes]
244//   [u32 subs_len][subscriptions bytes]
245//   [u32 crc32]
246//
247// Layout — v5 (v6.2.0, statistics trailer):
248//   [8 bytes magic "SPGENV01"]
249//   [u8 version = 5]
250//   [u32 catalog_len][catalog bytes]
251//   [u32 users_len][users bytes]
252//   [u32 pubs_len][publications bytes]
253//   [u32 subs_len][subscriptions bytes]
254//   [u32 stats_len][statistics bytes]      ← NEW
255//   [u32 crc32]
256//
257// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
258// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
259// statistics; v3 loads with empty subscriptions + statistics; v4
260// loads with empty statistics; v5 deserialises all three. Older
261// SPG versions reading a v5 envelope fall through the version
262// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
263// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
264
265const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
266const ENVELOPE_VERSION_V1: u8 = 1;
267const ENVELOPE_VERSION_V2: u8 = 2;
268const ENVELOPE_VERSION_V3: u8 = 3;
269const ENVELOPE_VERSION_V4: u8 = 4;
270const ENVELOPE_VERSION_V5: u8 = 5;
271
272fn build_envelope(catalog: &[u8], users: &[u8], pubs: &[u8], subs: &[u8], stats: &[u8]) -> Vec<u8> {
273    let mut out = Vec::with_capacity(
274        8 + 1
275            + 4
276            + catalog.len()
277            + 4
278            + users.len()
279            + 4
280            + pubs.len()
281            + 4
282            + subs.len()
283            + 4
284            + stats.len()
285            + 4,
286    );
287    out.extend_from_slice(ENVELOPE_MAGIC);
288    out.push(ENVELOPE_VERSION_V5);
289    out.extend_from_slice(
290        &u32::try_from(catalog.len())
291            .expect("≤ 4G catalog")
292            .to_le_bytes(),
293    );
294    out.extend_from_slice(catalog);
295    out.extend_from_slice(
296        &u32::try_from(users.len())
297            .expect("≤ 4G users")
298            .to_le_bytes(),
299    );
300    out.extend_from_slice(users);
301    out.extend_from_slice(
302        &u32::try_from(pubs.len())
303            .expect("≤ 4G publications")
304            .to_le_bytes(),
305    );
306    out.extend_from_slice(pubs);
307    out.extend_from_slice(
308        &u32::try_from(subs.len())
309            .expect("≤ 4G subscriptions")
310            .to_le_bytes(),
311    );
312    out.extend_from_slice(subs);
313    out.extend_from_slice(
314        &u32::try_from(stats.len())
315            .expect("≤ 4G statistics")
316            .to_le_bytes(),
317    );
318    out.extend_from_slice(stats);
319    let crc = spg_crypto::crc32::crc32(&out);
320    out.extend_from_slice(&crc.to_le_bytes());
321    out
322}
323
324/// Outcome of envelope parsing: either bare-catalog fallback, a
325/// successfully split section trio from a v1/v2/v3 envelope, or an
326/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
327/// (catalog-only fallback) preserves v3.x readability. v1/v2
328/// envelopes set `publications` to `None`; v3 sets it to the
329/// publications byte slice.
330enum EnvelopeParse<'a> {
331    Bare,
332    Pair {
333        catalog: &'a [u8],
334        users: &'a [u8],
335        publications: Option<&'a [u8]>,
336        subscriptions: Option<&'a [u8]>,
337        statistics: Option<&'a [u8]>,
338    },
339    CrcMismatch {
340        expected: u32,
341        computed: u32,
342    },
343}
344
345/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
346/// `Bare` for a buffer that doesn't look like an envelope (v3.x
347/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
348/// whose trailing CRC32 doesn't match the body.
349fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
350    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
351        return EnvelopeParse::Bare;
352    }
353    let version = buf[8];
354    if !matches!(
355        version,
356        ENVELOPE_VERSION_V1
357            | ENVELOPE_VERSION_V2
358            | ENVELOPE_VERSION_V3
359            | ENVELOPE_VERSION_V4
360            | ENVELOPE_VERSION_V5
361    ) {
362        return EnvelopeParse::Bare;
363    }
364    let mut p = 9usize;
365    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
366        return EnvelopeParse::Bare;
367    };
368    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
369        return EnvelopeParse::Bare;
370    };
371    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
372    p += 4;
373    if p + cat_len + 4 > buf.len() {
374        return EnvelopeParse::Bare;
375    }
376    let catalog = &buf[p..p + cat_len];
377    p += cat_len;
378    let Some(user_len_bytes) = buf.get(p..p + 4) else {
379        return EnvelopeParse::Bare;
380    };
381    let Ok(user_len_arr) = user_len_bytes.try_into() else {
382        return EnvelopeParse::Bare;
383    };
384    let user_len = u32::from_le_bytes(user_len_arr) as usize;
385    p += 4;
386    if p + user_len > buf.len() {
387        return EnvelopeParse::Bare;
388    }
389    let users = &buf[p..p + user_len];
390    p += user_len;
391    let publications = if matches!(
392        version,
393        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
394    ) {
395        // [u32 pubs_len][publications bytes]
396        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
397            return EnvelopeParse::Bare;
398        };
399        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
400            return EnvelopeParse::Bare;
401        };
402        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
403        p += 4;
404        if p + pubs_len > buf.len() {
405            return EnvelopeParse::Bare;
406        }
407        let pubs_slice = &buf[p..p + pubs_len];
408        p += pubs_len;
409        Some(pubs_slice)
410    } else {
411        None
412    };
413    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
414        // [u32 subs_len][subscriptions bytes]
415        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
416            return EnvelopeParse::Bare;
417        };
418        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
419            return EnvelopeParse::Bare;
420        };
421        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
422        p += 4;
423        if p + subs_len > buf.len() {
424            return EnvelopeParse::Bare;
425        }
426        let subs_slice = &buf[p..p + subs_len];
427        p += subs_len;
428        Some(subs_slice)
429    } else {
430        None
431    };
432    let statistics = if version == ENVELOPE_VERSION_V5 {
433        // [u32 stats_len][statistics bytes]
434        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
435            return EnvelopeParse::Bare;
436        };
437        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
438            return EnvelopeParse::Bare;
439        };
440        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
441        p += 4;
442        if p + stats_len > buf.len() {
443            return EnvelopeParse::Bare;
444        }
445        let stats_slice = &buf[p..p + stats_len];
446        p += stats_len;
447        Some(stats_slice)
448    } else {
449        None
450    };
451    if matches!(
452        version,
453        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
454    ) {
455        if p + 4 != buf.len() {
456            return EnvelopeParse::Bare;
457        }
458        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
459            return EnvelopeParse::Bare;
460        };
461        let expected = u32::from_le_bytes(crc_arr);
462        let computed = spg_crypto::crc32::crc32(&buf[..p]);
463        if expected != computed {
464            return EnvelopeParse::CrcMismatch { expected, computed };
465        }
466    } else if p != buf.len() {
467        // v1: must end exactly at the users section.
468        return EnvelopeParse::Bare;
469    }
470    EnvelopeParse::Pair {
471        catalog,
472        users,
473        publications,
474        subscriptions,
475        statistics,
476    }
477}
478
479/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
480/// threaded through `Engine::execute_in` so dispatch can identify which
481/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
482/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
483/// startup replay — implicitly uses through the unchanged
484/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
485/// runtime (dispatch holds `engine.write()` across the wrap, same as
486/// v4.34); the map shape is here to let v4.42 turn on N in-flight
487/// implicit TXs without reshuffling the engine internals.
488#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
489pub struct TxId(pub u64);
490
491/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
492/// global-shadow path. New `alloc_tx_id` handles start at 1.
493pub const IMPLICIT_TX: TxId = TxId(0);
494
495/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
496/// SEGMENTS` when no explicit target is supplied. Segments whose
497/// `OwnedSegment::bytes().len()` is **strictly** less than this
498/// value are eligible to merge. spg-server reads
499/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
500pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
501
502/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
503/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
504/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
505/// rolls back (slot removed, catalog discarded).
506#[derive(Debug, Default, Clone)]
507struct TxState {
508    /// The TX's shadow copy of the catalog. Started as a clone of
509    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
510    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
511    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
512    catalog: Catalog,
513    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
514    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
515    /// `ROLLBACK TO <name>` restores from the entry and pops everything
516    /// after it; `RELEASE <name>` discards the entry and everything
517    /// after; COMMIT/ROLLBACK clears the whole stack.
518    savepoints: Vec<(String, Catalog)>,
519}
520
521/// v7.11.0 — frozen read-only view of the engine's committed state.
522/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
523/// catalog, statistics, clock function, and row-cap config — the
524/// four fields the `execute_readonly` path actually reads. Cheap to
525/// `Clone` (each clone shares the underlying `PersistentVec` row
526/// storage; only the trie root pointers copy). Send + Sync so a
527/// snapshot can be moved across `tokio::task::spawn_blocking`
528/// boundaries without coordination.
529///
530/// The contract: a snapshot reflects the engine's state at the
531/// moment `clone_snapshot()` returned. Subsequent writes to the
532/// engine are NOT visible. Callers who need fresher data take a
533/// new snapshot.
534#[derive(Debug, Clone)]
535pub struct CatalogSnapshot {
536    catalog: Catalog,
537    statistics: statistics::Statistics,
538    clock: Option<ClockFn>,
539    max_query_rows: Option<usize>,
540}
541
542#[derive(Debug, Default)]
543pub struct Engine {
544    /// Committed catalog — what survives `Engine::snapshot()` and what
545    /// outside-TX `SELECT`s read.
546    catalog: Catalog,
547    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
548    /// v4.41.1 runtime invariant: at most one entry (single-writer
549    /// model unchanged). v4.42 will let dispatch hold multiple entries
550    /// concurrently for group commit + engine MVCC.
551    tx_catalogs: BTreeMap<TxId, TxState>,
552    /// Which slot the next exec_* call should mutate. Set by
553    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
554    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
555    /// write goes straight against `catalog`).
556    current_tx: Option<TxId>,
557    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
558    /// reserved for `IMPLICIT_TX`.
559    next_tx_id: u64,
560    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
561    /// / `CURRENT_DATE`. Set by the host environment.
562    clock: Option<ClockFn>,
563    /// v4.1 cryptographic RNG for per-user password salt. Set by the
564    /// host. `None` means SQL-driven `CREATE USER` uses a
565    /// deterministic fallback — see `SaltFn`.
566    salt_fn: Option<SaltFn>,
567    /// v4.2 per-query row cap. `None` = unlimited. When set, a
568    /// SELECT that materialises more than `n` rows returns
569    /// `EngineError::RowLimitExceeded`. Enforced before the result
570    /// is shaped into wire frames so a runaway scan can't blow the
571    /// server's heap.
572    max_query_rows: Option<usize>,
573    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
574    /// the server decides what that means at the auth boundary
575    /// (open mode vs legacy single-password mode). User CRUD goes
576    /// through `create_user`/`drop_user`/`verify_user`; persistence
577    /// rides the snapshot envelope alongside the catalog.
578    users: UserStore,
579    /// v6.1.2 logical-replication publication catalog. Empty until
580    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
581    /// trailer (see `build_envelope`).
582    publications: publications::Publications,
583    /// v6.1.4 logical-replication subscription catalog. Empty until
584    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
585    /// trailer.
586    subscriptions: subscriptions::Subscriptions,
587    /// v6.2.0 — per-column statistics for the cost-based optimizer.
588    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
589    /// table. Persistence rides the v5 envelope trailer.
590    statistics: statistics::Statistics,
591    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
592    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
593    /// the snapshot envelope (rebuilt on demand after restart).
594    plan_cache: plan_cache::PlanCache,
595    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
596    /// surfaced via `spg_stat_query` virtual table. Updated by the
597    /// `execute_*` paths after a successful execute.
598    query_stats: query_stats::QueryStats,
599    /// v6.5.2 — connection-state provider callback. spg-server
600    /// registers a function at startup that snapshots its
601    /// per-pgwire-connection registry into `ActivityRow`s; engine
602    /// reads through it on every `SELECT * FROM spg_stat_activity`.
603    /// `None` ⇒ no-data (returns empty rows; matches the no_std
604    /// embedded callers that don't run pgwire).
605    activity_provider: Option<ActivityProvider>,
606    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
607    /// activity_provider: spg-server registers both at startup;
608    /// engine reads through on `SELECT * FROM spg_audit_chain` and
609    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
610    audit_chain_provider: Option<AuditChainProvider>,
611    audit_verifier: Option<AuditVerifier>,
612    /// v6.5.6 — slow-query log threshold in microseconds. When set,
613    /// every successful execute whose elapsed exceeds the threshold
614    /// gets fed to the registered slow-query log callback (so
615    /// spg-server can emit a structured log line). Default `None`
616    /// = no slow-query logging.
617    slow_query_threshold_us: Option<u64>,
618    slow_query_logger: Option<SlowQueryLogger>,
619    /// v7.12.1 — session parameters set via `SET <name> = <value>`.
620    /// Only `default_text_search_config` is consumed by the engine
621    /// today (the FTS function dispatcher reads it when
622    /// `to_tsvector(text)` is called without an explicit config).
623    /// All other names are accepted + recorded so PG-dump output
624    /// loads, but have no behavioural effect.
625    session_params: BTreeMap<String, String>,
626    /// v7.12.7 — depth counter for trigger-emitted embedded SQL.
627    /// Each time the engine executes a `DeferredEmbeddedStmt` it
628    /// increments this; the recursive `execute_stmt_with_cancel`
629    /// inside that path checks against [`MAX_TRIGGER_RECURSION`]
630    /// to bound runaway cascades (trigger A's UPDATE on table B
631    /// fires trigger B which UPDATEs table A which fires trigger
632    /// A again…). Reset to 0 once the original DML returns.
633    trigger_recursion_depth: u32,
634    /// v7.14.0 — when `SET FOREIGN_KEY_CHECKS=0` is in effect
635    /// (mysqldump preamble), the FK existence + arity check at
636    /// CREATE TABLE time is deferred. FKs referencing a
637    /// not-yet-existing parent land in `pending_foreign_keys`
638    /// keyed by child table; `SET FOREIGN_KEY_CHECKS=1` drains
639    /// the queue and resolves each FK against the now-complete
640    /// catalog. Empty by default; the queue is drained on every
641    /// `RESET ALL` too.
642    foreign_key_checks: bool,
643    /// v7.16.2 — true on the temp Engine an outer
644    /// `exec_select_with_meta_views` builds, telling that
645    /// temp engine "stop short-circuiting into the meta-view
646    /// path — your catalog already has the materialised
647    /// tables; just run the regular SELECT." Without this we'd
648    /// infinite-loop since the meta-view name (e.g.
649    /// `__spg_info_columns`) still triggers
650    /// `select_references_meta_view`.
651    meta_views_materialised: bool,
652    pending_foreign_keys: Vec<(alloc::string::String, spg_sql::ast::ForeignKeyConstraint)>,
653}
654
655/// v7.12.7 — hard cap on nested trigger-emitted embedded SQL
656/// fires. 16 deep is well past anything a normal trigger graph
657/// uses while still preventing infinite-loop wedging.
658const MAX_TRIGGER_RECURSION: u32 = 16;
659
660/// v6.5.6 — callback signature for slow-query log emission. Called
661/// with `(sql, elapsed_us)` once per successful execute that crosses
662/// the threshold.
663pub type SlowQueryLogger = fn(&str, u64);
664
665/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
666/// state. Round-trips through `Engine::execute` to recreate the
667/// same schema (sans data + indexes — indexes are emitted as a
668/// separate `CREATE INDEX` chain in `spg_database_ddl`).
669fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
670    let mut out = alloc::format!("CREATE TABLE {name} (");
671    for (i, col) in columns.iter().enumerate() {
672        if i > 0 {
673            out.push_str(", ");
674        }
675        out.push_str(&col.name);
676        out.push(' ');
677        out.push_str(&render_data_type(col.ty));
678        if !col.nullable {
679            out.push_str(" NOT NULL");
680        }
681        if col.auto_increment {
682            out.push_str(" AUTO_INCREMENT");
683        }
684    }
685    out.push(')');
686    out
687}
688
689fn render_data_type(ty: DataType) -> String {
690    match ty {
691        DataType::SmallInt => "SMALLINT".into(),
692        DataType::Int => "INT".into(),
693        DataType::BigInt => "BIGINT".into(),
694        DataType::Float => "FLOAT".into(),
695        DataType::Text => "TEXT".into(),
696        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
697        DataType::Char(n) => alloc::format!("CHAR({n})"),
698        DataType::Bool => "BOOL".into(),
699        DataType::Vector { dim, encoding } => match encoding {
700            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
701            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
702            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
703        },
704        DataType::Numeric { precision, scale } => {
705            alloc::format!("NUMERIC({precision},{scale})")
706        }
707        DataType::Date => "DATE".into(),
708        DataType::Timestamp => "TIMESTAMP".into(),
709        DataType::Interval => "INTERVAL".into(),
710        DataType::Json => "JSON".into(),
711        DataType::Jsonb => "JSONB".into(),
712        DataType::Timestamptz => "TIMESTAMPTZ".into(),
713        DataType::Bytes => "BYTEA".into(),
714        DataType::TextArray => "TEXT[]".into(),
715        DataType::IntArray => "INT[]".into(),
716        DataType::BigIntArray => "BIGINT[]".into(),
717        DataType::TsVector => "TSVECTOR".into(),
718        DataType::TsQuery => "TSQUERY".into(),
719    }
720}
721
722/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
723/// spg-server can construct rows without re-exporting internal
724/// dispatch types.
725#[derive(Debug, Clone)]
726pub struct ActivityRow {
727    pub pid: u32,
728    pub user: String,
729    pub started_at_us: i64,
730    pub current_sql: String,
731    pub wait_event: String,
732    pub elapsed_us: i64,
733    pub in_transaction: bool,
734}
735
736/// v6.5.2 — provider callback type. Fresh snapshot returned each
737/// call; engine doesn't cache the slice.
738pub type ActivityProvider = fn() -> Vec<ActivityRow>;
739
740/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
741/// spg-server can construct rows directly from `AuditEntry`.
742#[derive(Debug, Clone)]
743pub struct AuditRow {
744    pub seq: i64,
745    pub ts_ms: i64,
746    pub prev_hash_hex: String,
747    pub entry_hash_hex: String,
748    pub sql: String,
749}
750
751/// v6.5.3 — chain-table provider + verifier. spg-server registers
752/// fn pointers that snapshot / verify the audit log. `verify`
753/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
754/// `-1` on a clean chain.
755pub type AuditChainProvider = fn() -> Vec<AuditRow>;
756pub type AuditVerifier = fn() -> (i64, i64);
757
758impl Engine {
759    pub fn new() -> Self {
760        Self {
761            catalog: Catalog::new(),
762            tx_catalogs: BTreeMap::new(),
763            current_tx: None,
764            next_tx_id: 1,
765            clock: None,
766            salt_fn: None,
767            max_query_rows: None,
768            users: UserStore::new(),
769            publications: publications::Publications::new(),
770            subscriptions: subscriptions::Subscriptions::new(),
771            statistics: statistics::Statistics::new(),
772            plan_cache: plan_cache::PlanCache::new(),
773            query_stats: query_stats::QueryStats::new(),
774            activity_provider: None,
775            audit_chain_provider: None,
776            audit_verifier: None,
777            slow_query_threshold_us: None,
778            slow_query_logger: None,
779            session_params: BTreeMap::new(),
780            trigger_recursion_depth: 0,
781            foreign_key_checks: true,
782            meta_views_materialised: false,
783            pending_foreign_keys: Vec::new(),
784        }
785    }
786
787    /// v7.11.0 — clone the engine's committed catalog + read-time
788    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
789    /// backed by `PersistentVec`; cloning is O(log n) per table).
790    /// Subsequent writes to this engine are invisible to the
791    /// snapshot; the snapshot is self-contained and can be moved
792    /// to another thread for concurrent `execute_readonly_on_snapshot`
793    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
794    /// and any other read-fanout pattern.
795    #[must_use]
796    pub fn clone_snapshot(&self) -> CatalogSnapshot {
797        CatalogSnapshot {
798            catalog: self.active_catalog().clone(),
799            statistics: self.statistics.clone(),
800            clock: self.clock,
801            max_query_rows: self.max_query_rows,
802        }
803    }
804
805    /// v7.11.1 — execute a read-only SQL statement against a
806    /// `CatalogSnapshot` without touching this engine. Same
807    /// semantics as `execute_readonly` but parameterised on the
808    /// snapshot's catalog. Reject DDL/DML the same way
809    /// `execute_readonly` does. Static-on-Self so the caller can
810    /// dispatch without holding an `Engine` borrow alongside the
811    /// snapshot.
812    pub fn execute_readonly_on_snapshot(
813        snapshot: &CatalogSnapshot,
814        sql: &str,
815    ) -> Result<QueryResult, EngineError> {
816        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
817    }
818
819    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
820    /// cancellation. Builds a transient `Engine` over the snapshot
821    /// state, runs `execute_readonly_with_cancel`, drops. The
822    /// transient engine is cheap to construct (no I/O; everything
823    /// is just struct moves) and lets the existing read path stay
824    /// untouched.
825    pub fn execute_readonly_on_snapshot_with_cancel(
826        snapshot: &CatalogSnapshot,
827        sql: &str,
828        cancel: CancelToken<'_>,
829    ) -> Result<QueryResult, EngineError> {
830        let transient = Engine {
831            catalog: snapshot.catalog.clone(),
832            statistics: snapshot.statistics.clone(),
833            clock: snapshot.clock,
834            max_query_rows: snapshot.max_query_rows,
835            ..Engine::default()
836        };
837        transient.execute_readonly_with_cancel(sql, cancel)
838    }
839
840    /// Construct an engine restored from a previously-snapshotted catalog
841    /// (see `snapshot()`).
842    pub fn restore(catalog: Catalog) -> Self {
843        Self {
844            catalog,
845            tx_catalogs: BTreeMap::new(),
846            current_tx: None,
847            next_tx_id: 1,
848            clock: None,
849            salt_fn: None,
850            max_query_rows: None,
851            users: UserStore::new(),
852            publications: publications::Publications::new(),
853            subscriptions: subscriptions::Subscriptions::new(),
854            statistics: statistics::Statistics::new(),
855            plan_cache: plan_cache::PlanCache::new(),
856            query_stats: query_stats::QueryStats::new(),
857            activity_provider: None,
858            audit_chain_provider: None,
859            audit_verifier: None,
860            slow_query_threshold_us: None,
861            slow_query_logger: None,
862            session_params: BTreeMap::new(),
863            trigger_recursion_depth: 0,
864            foreign_key_checks: true,
865            meta_views_materialised: false,
866            pending_foreign_keys: Vec::new(),
867        }
868    }
869
870    /// Restore an engine + user table from a v4.1 envelope produced
871    /// by `snapshot_with_users()`. Falls back to plain catalog-only
872    /// restore if the envelope magic isn't present (so v3.x snapshot
873    /// files still load). v6.1.2 adds the optional publications
874    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
875    /// empty publication table.
876    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
877        match split_envelope(buf) {
878            EnvelopeParse::Pair {
879                catalog: catalog_bytes,
880                users: user_bytes,
881                publications: pub_bytes,
882                subscriptions: sub_bytes,
883                statistics: stats_bytes,
884            } => {
885                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
886                let users = users::deserialize_users(user_bytes)
887                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
888                let publications = match pub_bytes {
889                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
890                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
891                    })?,
892                    None => publications::Publications::new(),
893                };
894                let subscriptions = match sub_bytes {
895                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
896                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
897                    })?,
898                    None => subscriptions::Subscriptions::new(),
899                };
900                let statistics = match stats_bytes {
901                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
902                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
903                    })?,
904                    None => statistics::Statistics::new(),
905                };
906                Ok(Self {
907                    catalog,
908                    tx_catalogs: BTreeMap::new(),
909                    current_tx: None,
910                    next_tx_id: 1,
911                    clock: None,
912                    salt_fn: None,
913                    max_query_rows: None,
914                    users,
915                    publications,
916                    subscriptions,
917                    statistics,
918                    plan_cache: plan_cache::PlanCache::new(),
919                    query_stats: query_stats::QueryStats::new(),
920                    activity_provider: None,
921                    audit_chain_provider: None,
922                    audit_verifier: None,
923                    slow_query_threshold_us: None,
924                    slow_query_logger: None,
925                    session_params: BTreeMap::new(),
926                    trigger_recursion_depth: 0,
927                    foreign_key_checks: true,
928                    meta_views_materialised: false,
929                    pending_foreign_keys: Vec::new(),
930                })
931            }
932            EnvelopeParse::CrcMismatch { expected, computed } => {
933                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
934                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
935                ))))
936            }
937            EnvelopeParse::Bare => {
938                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
939                Ok(Self::restore(catalog))
940            }
941        }
942    }
943
944    pub const fn users(&self) -> &UserStore {
945        &self.users
946    }
947
948    /// `salt` is supplied by the caller (the host has a random
949    /// source; the engine is `no_std`). Caller should pass a fresh
950    /// 16-byte random value per user.
951    pub fn create_user(
952        &mut self,
953        name: &str,
954        password: &str,
955        role: Role,
956        salt: [u8; 16],
957    ) -> Result<(), UserError> {
958        self.users.create(name, password, role, salt)?;
959        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
960        // auth can verify without re-running PBKDF2 per attempt.
961        // Uses a fresh salt from the host RNG (falls back to a
962        // deterministic per-username salt when no RNG is wired, same
963        // as the legacy hash path).
964        let scram_salt = self.salt_fn.map_or_else(
965            || {
966                let mut s = [0u8; users::SCRAM_SALT_LEN];
967                let digest = spg_crypto::hash(name.as_bytes());
968                // Use bytes 16..32 of BLAKE3 so we don't reuse the
969                // exact same fallback salt as the BLAKE3 hash path.
970                s.copy_from_slice(&digest[16..32]);
971                s
972            },
973            |f| f(),
974        );
975        self.users
976            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
977        Ok(())
978    }
979
980    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
981        self.users.drop(name)
982    }
983
984    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
985        self.users.verify(name, password)
986    }
987
988    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
989    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
990    #[must_use]
991    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
992        self.clock = Some(clock);
993        self
994    }
995
996    /// Builder: attach an OS-backed RNG for per-user password salts.
997    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
998    #[must_use]
999    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
1000        self.salt_fn = Some(f);
1001        self
1002    }
1003
1004    /// Builder: cap the number of rows a single SELECT may return.
1005    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
1006    /// the bound is checked inside the executor so a runaway
1007    /// catalog scan can't allocate millions of rows before the
1008    /// server gets a chance to reject the result.
1009    #[must_use]
1010    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
1011        self.max_query_rows = Some(n);
1012        self
1013    }
1014
1015    /// The *committed* catalog. Note: during a transaction this returns the
1016    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
1017    /// the shadow. Tests that inspect outside-TX state should use this.
1018    pub const fn catalog(&self) -> &Catalog {
1019        &self.catalog
1020    }
1021
1022    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
1023    /// adds the rule that an open TX's shadow is never snapshotted — only the
1024    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
1025    /// when there are users to persist; an empty user table snapshots as the
1026    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
1027    /// adds publications to the envelope condition: either non-empty
1028    /// users OR non-empty publications now triggers the envelope path.
1029    pub fn snapshot(&self) -> Vec<u8> {
1030        if self.users.is_empty()
1031            && self.publications.is_empty()
1032            && self.subscriptions.is_empty()
1033            && self.statistics.is_empty()
1034        {
1035            self.catalog.serialize()
1036        } else {
1037            build_envelope(
1038                &self.catalog.serialize(),
1039                &users::serialize_users(&self.users),
1040                &self.publications.serialize(),
1041                &self.subscriptions.serialize(),
1042                &self.statistics.serialize(),
1043            )
1044        }
1045    }
1046
1047    /// True when at least one TX slot is in flight. v4.41.1 runtime
1048    /// invariant: at most one slot active at a time (dispatch holds
1049    /// `engine.write()` across the entire wrap). v4.42 will let this
1050    /// return true with multiple slots concurrently.
1051    pub fn in_transaction(&self) -> bool {
1052        !self.tx_catalogs.is_empty()
1053    }
1054
1055    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
1056    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
1057    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
1058    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
1059    /// sequentially under a single `engine.write()` so each task's
1060    /// mutations accumulate into shared state, then either keeps the
1061    /// accumulated state (fsync OK) or restores the pre-image via
1062    /// `replace_catalog` (fsync err).
1063    pub fn alloc_tx_id(&mut self) -> TxId {
1064        let id = TxId(self.next_tx_id);
1065        self.next_tx_id = self.next_tx_id.saturating_add(1);
1066        id
1067    }
1068
1069    /// v4.42 — atomically replace the live catalog. Used by the
1070    /// commit-barrier leader to roll back a group whose batched
1071    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1072    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1073    /// at group start, sequentially applies each task's BEGIN+sql+
1074    /// COMMIT under the same write lock to accumulate mutations
1075    /// into shared state, batches the WAL bytes, fsyncs once, and
1076    /// on failure calls this with the pre-image to undo every
1077    /// task in the group at once.
1078    ///
1079    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1080    /// explicit-TX slot from a concurrent client (created via the
1081    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1082    /// MVCC-readers v5+ work) has its own snapshot baked into the
1083    /// slot — restoring `self.catalog` to the pre-image leaves
1084    /// those slots untouched, exactly as they were when the leader
1085    /// took the lock. The leader's own implicit-TX slots are all
1086    /// already discarded (`exec_commit` removed them as each
1087    /// task's COMMIT ran) by the time this is reached.
1088    pub fn replace_catalog(&mut self, catalog: Catalog) {
1089        self.catalog = catalog;
1090    }
1091
1092    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1093    /// so tests + the spg-server freezer can drive a freeze without
1094    /// reaching into the private `active_catalog_mut`. v6.7.4
1095    /// parallel freezer will build on this surface.
1096    ///
1097    /// Marks the table's cached `cold_row_count` stale because the
1098    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1099    pub fn freeze_oldest_to_cold(
1100        &mut self,
1101        table_name: &str,
1102        index_name: &str,
1103        max_rows: usize,
1104    ) -> Result<spg_storage::FreezeReport, EngineError> {
1105        let report = self
1106            .active_catalog_mut()
1107            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1108            .map_err(EngineError::Storage)?;
1109        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1110            t.mark_cold_row_count_stale();
1111        }
1112        Ok(report)
1113    }
1114
1115    /// v6.7.5 — public shim used by the spg-server follower's
1116    /// segment-forwarding receiver. Registers a cold-tier segment
1117    /// at a specific id (the master's id, as transmitted on the
1118    /// wire) so the follower's BTree-Cold locators stay byte-
1119    /// identical with the master's. Wraps
1120    /// `Catalog::load_segment_bytes_at` under the standard
1121    /// clone-mutate-replace pattern.
1122    ///
1123    /// Returns `Ok(())` on success **and** on the "slot already
1124    /// occupied" case — a follower mid-reconnect may receive a
1125    /// segment chunk for a segment_id it already has on disk
1126    /// (forwarded last session); the caller should treat that
1127    /// path as a no-op rather than a fatal error.
1128    pub fn receive_cold_segment(
1129        &mut self,
1130        segment_id: u32,
1131        bytes: Vec<u8>,
1132    ) -> Result<(), EngineError> {
1133        let mut new_cat = self.catalog.clone();
1134        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1135            Ok(()) => {
1136                self.replace_catalog(new_cat);
1137                Ok(())
1138            }
1139            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1140            Err(e) => Err(EngineError::Storage(e)),
1141        }
1142    }
1143
1144    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1145    /// driving every BTree index on every user table. Returns one
1146    /// `(table, index, report)` triple for each merge that
1147    /// actually happened (no-op (table, index) pairs are filtered
1148    /// out so callers can size persist-side work to the live
1149    /// merges). Caller is responsible for persisting each
1150    /// `report.merged_segment_bytes` and updating the on-disk
1151    /// segment registry; engine layer is no_std and never
1152    /// touches disk.
1153    ///
1154    /// Marks every touched table's cached `cold_row_count` stale
1155    /// — compaction GC'd some shadowed rows, so the count must be
1156    /// re-derived on the next ANALYZE.
1157    pub fn compact_cold_segments_with_target(
1158        &mut self,
1159        target_segment_bytes: u64,
1160    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1161        let table_names = self.active_catalog().table_names();
1162        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1163        for tname in table_names {
1164            if is_internal_table_name(&tname) {
1165                continue;
1166            }
1167            let idx_names: Vec<String> = {
1168                let Some(t) = self.active_catalog().get(&tname) else {
1169                    continue;
1170                };
1171                t.indices()
1172                    .iter()
1173                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1174                    .map(|i| i.name.clone())
1175                    .collect()
1176            };
1177            for iname in idx_names {
1178                let report = self
1179                    .active_catalog_mut()
1180                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1181                    .map_err(EngineError::Storage)?;
1182                if report.merged_segment_id.is_some() {
1183                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1184                        t.mark_cold_row_count_stale();
1185                    }
1186                    reports.push((tname.clone(), iname, report));
1187                }
1188            }
1189        }
1190        Ok(reports)
1191    }
1192
1193    fn active_catalog(&self) -> &Catalog {
1194        match self.current_tx {
1195            Some(t) => self
1196                .tx_catalogs
1197                .get(&t)
1198                .map_or(&self.catalog, |s| &s.catalog),
1199            None => &self.catalog,
1200        }
1201    }
1202
1203    /// v7.12.4 — snapshot every row-level trigger on `table` that
1204    /// fires for `event` (`"INSERT"` / `"UPDATE"` / `"DELETE"`) at
1205    /// the given `timing` (`"BEFORE"` / `"AFTER"`), and clone its
1206    /// referenced function definition. Returned as a vec of owned
1207    /// `FunctionDef` so the row-write loop can fire them without
1208    /// holding a borrow on the catalog (which would conflict with
1209    /// the table.insert / update_row / delete mutable borrows).
1210    /// v7.16.2 — top-level DO block executor. Walks the
1211    /// PlPgSqlBlock via [`triggers::execute_do_block_top_level`],
1212    /// then runs each collected EmbeddedSql statement through
1213    /// the engine's regular execute path (NOT deferred — DO is
1214    /// outside any row-write borrow). Errors from any step
1215    /// abort the block and propagate verbatim.
1216    /// v7.16.2 — resolve every subquery inside a PlPgSqlBlock's
1217    /// expression slots so the downstream trigger-flavoured
1218    /// evaluator (which expects pre-resolved Expr::Literal /
1219    /// Binary chains) doesn't trip on raw Exists/ScalarSubquery
1220    /// nodes. Walks IF conditions, Assign values, RAISE args.
1221    /// EmbeddedSql statements re-enter the engine for execution
1222    /// later so their subqueries get the normal SELECT-side
1223    /// resolution.
1224    fn resolve_plpgsql_block_subqueries(
1225        &self,
1226        block: &mut spg_sql::ast::PlPgSqlBlock,
1227        cancel: CancelToken<'_>,
1228    ) -> Result<(), EngineError> {
1229        for d in &mut block.declarations {
1230            if let Some(e) = &mut d.default {
1231                self.resolve_expr_subqueries(e, cancel)?;
1232            }
1233        }
1234        self.resolve_plpgsql_stmts_subqueries(&mut block.statements, cancel)
1235    }
1236
1237    fn resolve_plpgsql_stmts_subqueries(
1238        &self,
1239        stmts: &mut [spg_sql::ast::PlPgSqlStmt],
1240        cancel: CancelToken<'_>,
1241    ) -> Result<(), EngineError> {
1242        use spg_sql::ast::PlPgSqlStmt;
1243        for stmt in stmts {
1244            match stmt {
1245                PlPgSqlStmt::Assign { value, .. } => {
1246                    self.resolve_expr_subqueries(value, cancel)?;
1247                }
1248                PlPgSqlStmt::Return(spg_sql::ast::ReturnTarget::Expr(e)) => {
1249                    self.resolve_expr_subqueries(e, cancel)?;
1250                }
1251                PlPgSqlStmt::Return(_) => {}
1252                PlPgSqlStmt::If {
1253                    branches,
1254                    else_branch,
1255                } => {
1256                    for (cond, body) in branches.iter_mut() {
1257                        self.resolve_expr_subqueries(cond, cancel)?;
1258                        self.resolve_plpgsql_stmts_subqueries(body, cancel)?;
1259                    }
1260                    self.resolve_plpgsql_stmts_subqueries(else_branch, cancel)?;
1261                }
1262                PlPgSqlStmt::Raise { args, .. } => {
1263                    for a in args {
1264                        self.resolve_expr_subqueries(a, cancel)?;
1265                    }
1266                }
1267                PlPgSqlStmt::EmbeddedSql(_) => {
1268                    // Embedded SQL goes back through execute_stmt
1269                    // _with_cancel which runs the SELECT-side
1270                    // resolver itself; nothing to do here.
1271                }
1272                PlPgSqlStmt::SelectInto { body, .. } => {
1273                    // SELECT INTO runs through Engine::execute
1274                    // when reached, so subquery resolution
1275                    // happens via the normal SELECT-side path.
1276                    // Still walk for nested subqueries inside
1277                    // the SELECT body so eval doesn't trip.
1278                    self.resolve_select_subqueries(body, cancel)?;
1279                }
1280            }
1281        }
1282        Ok(())
1283    }
1284
1285    fn exec_do_block(
1286        &mut self,
1287        body: spg_sql::ast::PlPgSqlBlock,
1288    ) -> Result<QueryResult, EngineError> {
1289        // v7.16.2 — pre-resolve every subquery the body's
1290        // expressions reach. `eval::eval_expr` errors on
1291        // unresolved Exists/ScalarSubquery/InSubquery; the
1292        // top-level SELECT path runs `resolve_select_subqueries`
1293        // for the caller — for plpgsql we have to do the
1294        // equivalent before the body walker runs. Catches the
1295        // mailrs idiom `IF EXISTS (SELECT 1 FROM
1296        // information_schema.columns WHERE …) THEN …`.
1297        let mut body = body;
1298        self.resolve_plpgsql_block_subqueries(&mut body, CancelToken::none())?;
1299        let dts = self
1300            .session_param("default_text_search_config")
1301            .map(String::from);
1302        // v7.16.2 — SELECT … INTO resolver. The walker calls
1303        // this synchronously when it hits a SelectInto stmt
1304        // so the IF / locals scope sees the result before the
1305        // next statement. Body walks for trigger paths (no
1306        // resolver) error loudly on SelectInto.
1307        // SAFETY: the closure shares this engine borrow with
1308        // the walker, but the walker only borrows for the
1309        // duration of `execute_do_block_top_level` and doesn't
1310        // reach back into the engine through any other path —
1311        // so the recursive `&mut` is sound. We use a `RefCell`
1312        // for interior mutability since the closure is
1313        // Fn-shaped.
1314        let engine_cell = core::cell::RefCell::new(&mut *self);
1315        let resolver_fn =
1316            |stmt: &spg_sql::ast::Statement| -> Result<Value, triggers::TriggerError> {
1317                let mut eng = engine_cell.borrow_mut();
1318                let r = eng
1319                    .execute_stmt_with_cancel(stmt.clone(), CancelToken::none())
1320                    .map_err(|e| triggers::TriggerError::EvalFailed {
1321                        function: "DO".into(),
1322                        cause: eval::EvalError::TypeMismatch {
1323                            detail: alloc::format!("SELECT … INTO failed: {e}"),
1324                        },
1325                    })?;
1326                match r {
1327                    QueryResult::Rows { rows, .. } => match rows.into_iter().next() {
1328                        Some(row) => Ok(row.values.into_iter().next().unwrap_or(Value::Null)),
1329                        None => Ok(Value::Null),
1330                    },
1331                    _ => Err(triggers::TriggerError::EvalFailed {
1332                        function: "DO".into(),
1333                        cause: eval::EvalError::TypeMismatch {
1334                            detail: "SELECT … INTO body must be a SELECT".into(),
1335                        },
1336                    }),
1337                }
1338            };
1339        let collected =
1340            triggers::execute_do_block_top_level(&body, dts.as_deref(), Some(&resolver_fn))
1341                .map_err(|e| {
1342                    EngineError::Storage(StorageError::Corrupt(alloc::format!("DO: {e}")))
1343                })?;
1344        // engine_cell goes out of scope here, releasing the &mut self borrow
1345        // Run each embedded statement against the engine. The
1346        // statements were already substitute-walked for NEW/OLD/
1347        // locals (those evaluate to engine literals before they
1348        // land here) so dispatch is plain execute_stmt_with_cancel.
1349        for stmt in collected {
1350            // v7.16.2 — preserve current_tx wrap so an outer
1351            // BEGIN/COMMIT around a DO block keeps the
1352            // EmbeddedSql writes inside that same tx slot.
1353            self.execute_stmt_with_cancel(stmt, CancelToken::none())?;
1354        }
1355        Ok(QueryResult::CommandOk {
1356            affected: 0,
1357            modified_catalog: !self.in_transaction(),
1358        })
1359    }
1360
1361    fn snapshot_row_triggers(
1362        &self,
1363        table: &str,
1364        event: &str,
1365        timing: &str,
1366    ) -> Vec<spg_storage::FunctionDef> {
1367        let cat = self.active_catalog();
1368        cat.triggers()
1369            .iter()
1370            .filter(|t| {
1371                // v7.16.1 — skip disabled triggers (mailrs
1372                // round-9 A.2.b — pg_dump --disable-triggers).
1373                t.enabled
1374                    && t.table == table
1375                    && t.timing.eq_ignore_ascii_case(timing)
1376                    && t.for_each.eq_ignore_ascii_case("row")
1377                    && t.events.iter().any(|e| e.eq_ignore_ascii_case(event))
1378            })
1379            .filter_map(|t| cat.functions().get(&t.function).cloned())
1380            .collect()
1381    }
1382
1383    /// v7.13.0 — UPDATE-side snapshot that pairs each trigger's
1384    /// function with its `UPDATE OF cols` filter (mailrs round-5
1385    /// G7). Empty filter Vec means "fire unconditionally", matching
1386    /// the v7.12 behaviour.
1387    fn snapshot_update_row_triggers(
1388        &self,
1389        table: &str,
1390        timing: &str,
1391    ) -> Vec<(spg_storage::FunctionDef, Vec<String>)> {
1392        let cat = self.active_catalog();
1393        cat.triggers()
1394            .iter()
1395            .filter(|t| {
1396                // v7.16.1 — skip disabled triggers.
1397                t.enabled
1398                    && t.table == table
1399                    && t.timing.eq_ignore_ascii_case(timing)
1400                    && t.for_each.eq_ignore_ascii_case("row")
1401                    && t.events.iter().any(|e| e.eq_ignore_ascii_case("UPDATE"))
1402            })
1403            .filter_map(|t| {
1404                cat.functions()
1405                    .get(&t.function)
1406                    .cloned()
1407                    .map(|fd| (fd, t.update_columns.clone()))
1408            })
1409            .collect()
1410    }
1411
1412    /// v7.12.7 — drain the trigger-emitted embedded SQL queue.
1413    /// Called by the INSERT / UPDATE / DELETE executors after
1414    /// their main row-write loop returns. Each statement runs
1415    /// inside the same cancel scope as the firing DML and bumps
1416    /// the recursion counter; nested embedded SQL beyond
1417    /// [`MAX_TRIGGER_RECURSION`] errors with a clear message so
1418    /// a trigger-graph cycle surfaces as a query failure instead
1419    /// of stack-blowing the engine.
1420    fn execute_deferred_trigger_stmts(
1421        &mut self,
1422        deferred: Vec<triggers::DeferredEmbeddedStmt>,
1423        cancel: CancelToken<'_>,
1424    ) -> Result<(), EngineError> {
1425        for d in deferred {
1426            if self.trigger_recursion_depth >= MAX_TRIGGER_RECURSION {
1427                return Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1428                    "trigger embedded SQL recursion depth {} exceeded (trigger function \
1429                     {:?} would push past the {} cap — check for trigger cycles)",
1430                    self.trigger_recursion_depth,
1431                    d.function,
1432                    MAX_TRIGGER_RECURSION,
1433                ))));
1434            }
1435            self.trigger_recursion_depth += 1;
1436            let res = self.execute_stmt_with_cancel(d.stmt, cancel);
1437            self.trigger_recursion_depth -= 1;
1438            res?;
1439        }
1440        Ok(())
1441    }
1442
1443    fn active_catalog_mut(&mut self) -> &mut Catalog {
1444        let tx = self.current_tx;
1445        match tx {
1446            Some(t) => match self.tx_catalogs.get_mut(&t) {
1447                Some(s) => &mut s.catalog,
1448                None => &mut self.catalog,
1449            },
1450            None => &mut self.catalog,
1451        }
1452    }
1453
1454    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1455    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1456    /// every other statement, so the caller can fall through to the
1457    /// `&mut self` `execute` path under a write lock. Engine state is
1458    /// not mutated even on the success path (`rewrite_clock_calls`
1459    /// and `resolve_order_by_position` both mutate the locally-owned
1460    /// AST, not `self`).
1461    ///
1462    /// **v4.0 concurrency**: this is the entry point the server takes
1463    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1464    /// parallel without serialising on a single mutex.
1465    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1466        self.execute_readonly_with_cancel(sql, CancelToken::none())
1467    }
1468
1469    /// v4.5 — read path with cooperative cancellation. Token's
1470    /// `is_cancelled` is checked at the start (so a watchdog that
1471    /// already fired returns Cancelled immediately) and at row-loop
1472    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1473    /// don't bother checking.
1474    pub fn execute_readonly_with_cancel(
1475        &self,
1476        sql: &str,
1477        cancel: CancelToken<'_>,
1478    ) -> Result<QueryResult, EngineError> {
1479        cancel.check()?;
1480        let mut stmt = parser::parse_statement(sql)?;
1481        let now_micros = self.clock.map(|f| f());
1482        rewrite_clock_calls(&mut stmt, now_micros);
1483        if let Statement::Select(s) = &mut stmt {
1484            resolve_order_by_position(s);
1485            // v6.2.3 — cost-based JOIN reorder (read path).
1486            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1487        }
1488        let result = match stmt {
1489            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1490            Statement::ShowTables => Ok(self.exec_show_tables()),
1491            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1492            Statement::ShowUsers => Ok(self.exec_show_users()),
1493            Statement::ShowPublications => Ok(self.exec_show_publications()),
1494            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1495            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1496                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1497            )),
1498            Statement::Explain(e) => self.exec_explain(&e, cancel),
1499            _ => Err(EngineError::WriteRequired),
1500        };
1501        self.enforce_row_limit(result)
1502    }
1503
1504    /// v4.2: cap result-set size. Applied after the executor
1505    /// materialises rows but before they leave the engine — wrapping
1506    /// every Rows-returning exec_* function would scatter the check.
1507    fn enforce_row_limit(
1508        &self,
1509        result: Result<QueryResult, EngineError>,
1510    ) -> Result<QueryResult, EngineError> {
1511        if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1512            && rows.len() > cap
1513        {
1514            return Err(EngineError::RowLimitExceeded(cap));
1515        }
1516        result
1517    }
1518
1519    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1520        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1521    }
1522
1523    /// v4.5 — write path with cooperative cancellation. Same dispatch
1524    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1525    /// a separate entry point for backward-compat with the v4.5
1526    /// public API.
1527    pub fn execute_with_cancel(
1528        &mut self,
1529        sql: &str,
1530        cancel: CancelToken<'_>,
1531    ) -> Result<QueryResult, EngineError> {
1532        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1533    }
1534
1535    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1536    /// slot identified by `tx_id` so spg-server dispatch can scope
1537    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1538    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1539    /// every other caller (engine self-tests, replay, spg-embedded)
1540    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1541    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1542        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1543    }
1544
1545    /// v4.41.1 write path with cooperative cancellation + explicit TX
1546    /// scope. Sets `self.current_tx` for the duration of the call so
1547    /// every `exec_*` helper transparently sees its TX's shadow
1548    /// catalog and savepoint stack; restores on exit so the field is
1549    /// only valid mid-call (no leakage across calls).
1550    pub fn execute_in_with_cancel(
1551        &mut self,
1552        sql: &str,
1553        tx_id: TxId,
1554        cancel: CancelToken<'_>,
1555    ) -> Result<QueryResult, EngineError> {
1556        let saved = self.current_tx;
1557        self.current_tx = Some(tx_id);
1558        let result = self.execute_inner_with_cancel(sql, cancel);
1559        self.current_tx = saved;
1560        result
1561    }
1562
1563    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1564    /// resulting [`Statement`] can be cached and re-executed via
1565    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1566    /// the simple-query path would synthesise internally (clock
1567    /// rewrites + ORDER BY position-ref resolution applied at
1568    /// prepare time, since both are session-independent). The
1569    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1570    /// nodes; they're resolved to concrete values per-call by
1571    /// `execute_prepared`'s substitution walk.
1572    ///
1573    /// Pgwire's `Parse` (P) message lands here.
1574    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1575        let mut stmt = parser::parse_statement(sql)?;
1576        let now_micros = self.clock.map(|f| f());
1577        rewrite_clock_calls(&mut stmt, now_micros);
1578        if let Statement::Select(s) = &mut stmt {
1579            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1580            // SELECT-list item BEFORE position / alias resolution so
1581            // downstream passes see the explicit list.
1582            expand_group_by_all(s);
1583            resolve_order_by_position(s);
1584            // v6.2.3 — cost-based JOIN reorder. No-op for
1585            // single-table FROMs or any non-INNER join shape.
1586            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1587        }
1588        Ok(stmt)
1589    }
1590
1591    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1592    /// the plan cache on hit, runs the full `prepare()` path on miss
1593    /// and inserts the resulting plan before returning. Skipping the
1594    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1595    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1596    ///
1597    /// Returns a cloned `Statement` (not a borrow) because the
1598    /// pgwire layer owns its `PreparedStmt` map per-session and the
1599    /// engine-level cache must stay available for other sessions.
1600    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1601    /// it replaces.
1602    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1603        // v6.3.1 — version-aware lookup. If the cached plan was
1604        // prepared before the most recent ANALYZE, evict and replan.
1605        let current_version = self.statistics.version();
1606        if let Some(plan) = self.plan_cache.get(sql) {
1607            if plan.statistics_version == current_version {
1608                return Ok(plan.stmt.clone());
1609            }
1610            // Stale entry — fall through to evict + re-prepare.
1611        }
1612        self.plan_cache.evict(sql);
1613        let stmt = self.prepare(sql)?;
1614        let source_tables = plan_cache::collect_source_tables(&stmt);
1615        let plan = plan_cache::PreparedPlan {
1616            stmt: stmt.clone(),
1617            statistics_version: current_version,
1618            source_tables,
1619            describe_columns: alloc::vec::Vec::new(),
1620        };
1621        self.plan_cache.insert(String::from(sql), plan);
1622        Ok(stmt)
1623    }
1624
1625    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1626    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1627        &self.plan_cache
1628    }
1629
1630    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1631    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1632        &mut self.plan_cache
1633    }
1634
1635    /// v6.3.3 — Describe a prepared `Statement` without executing.
1636    /// Returns `(parameter_oids, output_columns)`. Empty
1637    /// `output_columns` means the statement has no row-producing
1638    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1639    /// — pgwire layer maps that to a `NoData` reply.
1640    pub fn describe_prepared(&self, stmt: &Statement) -> (Vec<u32>, Vec<ColumnSchema>) {
1641        describe::describe_prepared(stmt, self.active_catalog())
1642    }
1643
1644    /// v6.1.1 — execute a [`Statement`] previously returned by
1645    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1646    /// nodes for the corresponding [`Value`] in `params` (1-based
1647    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1648    /// are decoded into typed `Value`s by the pgwire layer before
1649    /// this call so the resulting AST hits the same execution
1650    /// path as a simple query — no SQL re-parse.
1651    ///
1652    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1653    pub fn execute_prepared(
1654        &mut self,
1655        mut stmt: Statement,
1656        params: &[Value],
1657    ) -> Result<QueryResult, EngineError> {
1658        substitute_placeholders(&mut stmt, params)?;
1659        // v7.16.0 — set `current_tx` for the duration of the
1660        // dispatch so the `exec_*` helpers see the right TX
1661        // slot (matches what `execute_in_with_cancel` does for
1662        // simple-query). Pre-v7.16 the simple-query path
1663        // worked because every public entry point routed
1664        // through `execute_in_with_cancel`; the prepared path
1665        // skipped the wrap and so its INSERTs/UPDATEs landed
1666        // in the no-tx default slot, silently invisible to a
1667        // BEGIN/COMMIT-bracketed flow. Caught by spg-sqlx's
1668        // first transaction-visibility test.
1669        let saved = self.current_tx;
1670        self.current_tx = Some(IMPLICIT_TX);
1671        let result = self.execute_stmt_with_cancel(stmt, CancelToken::none());
1672        self.current_tx = saved;
1673        result
1674    }
1675
1676    fn execute_inner_with_cancel(
1677        &mut self,
1678        sql: &str,
1679        cancel: CancelToken<'_>,
1680    ) -> Result<QueryResult, EngineError> {
1681        cancel.check()?;
1682        let stmt = self.prepare(sql)?;
1683        // v6.5.1 — wrap the executor with a wall-clock window so we
1684        // can record into spg_stat_query. Skip when the engine has
1685        // no clock attached (no_std embedded callers).
1686        let start_us = self.clock.map(|f| f());
1687        let result = self.execute_stmt_with_cancel(stmt, cancel);
1688        if let (Some(t0), Ok(_)) = (start_us, &result) {
1689            let now = self.clock.map_or(t0, |f| f());
1690            let elapsed = now.saturating_sub(t0).max(0) as u64;
1691            self.query_stats.record(sql, elapsed, now as u64);
1692            // v6.5.6 — slow-query log: fire callback when elapsed
1693            // exceeds the configured floor.
1694            if let (Some(threshold), Some(logger)) =
1695                (self.slow_query_threshold_us, self.slow_query_logger)
1696                && elapsed >= threshold
1697            {
1698                logger(sql, elapsed);
1699            }
1700        }
1701        result
1702    }
1703
1704    fn execute_stmt_with_cancel(
1705        &mut self,
1706        stmt: Statement,
1707        cancel: CancelToken<'_>,
1708    ) -> Result<QueryResult, EngineError> {
1709        cancel.check()?;
1710        let result = match stmt {
1711            Statement::CreateTable(s) => self.exec_create_table(s),
1712            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
1713            // CommandOk with affected=0; modified_catalog=false so
1714            // the WAL doesn't grow a useless entry. mailrs F3.
1715            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
1716                affected: 0,
1717                modified_catalog: false,
1718            }),
1719            // v7.16.2 — DO $$ ... $$ block. mailrs round-10 A.2
1720            // — the pre-v7.9.27 no-op SILENTLY swallowed every
1721            // mailrs migrate-038/-040/-042 idempotent rename
1722            // (the IF EXISTS … THEN ALTER … END block never
1723            // ran). v7.16.2 dispatches to exec_do_block which
1724            // runs the PlPgSqlBlock at top level via the same
1725            // execute_stmts machinery the trigger executor
1726            // uses (NEW=None, OLD=None — DO blocks have no
1727            // row context).
1728            Statement::DoBlock(body) => self.exec_do_block(body),
1729            // v7.14.0 — empty-statement no-op for pg_dump /
1730            // mysqldump preamble lines that collapse to nothing
1731            // after comment-stripping.
1732            Statement::Empty => Ok(QueryResult::CommandOk {
1733                affected: 0,
1734                modified_catalog: false,
1735            }),
1736            Statement::DropTable { names, if_exists } => self.exec_drop_table(names, if_exists),
1737            Statement::DropIndex { name, if_exists } => self.exec_drop_index(name, if_exists),
1738            Statement::CreateIndex(s) => self.exec_create_index(s),
1739            Statement::Insert(s) => self.exec_insert(s),
1740            Statement::Update(s) => self.exec_update_cancel(&s, cancel),
1741            Statement::Delete(s) => self.exec_delete_cancel(&s, cancel),
1742            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1743            Statement::Begin => self.exec_begin(),
1744            Statement::Commit => self.exec_commit(),
1745            Statement::Rollback => self.exec_rollback(),
1746            Statement::Savepoint(name) => self.exec_savepoint(name),
1747            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
1748            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
1749            Statement::ShowTables => Ok(self.exec_show_tables()),
1750            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1751            Statement::ShowUsers => Ok(self.exec_show_users()),
1752            Statement::ShowPublications => Ok(self.exec_show_publications()),
1753            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1754            Statement::CreateUser(s) => self.exec_create_user(&s),
1755            Statement::DropUser(name) => self.exec_drop_user(&name),
1756            Statement::Explain(e) => self.exec_explain(&e, cancel),
1757            Statement::AlterIndex(s) => self.exec_alter_index(s),
1758            Statement::AlterTable(s) => self.exec_alter_table(s),
1759            Statement::CreatePublication(s) => self.exec_create_publication(s),
1760            Statement::DropPublication(name) => self.exec_drop_publication(&name),
1761            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
1762            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
1763            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
1764            // which lives in spg-server's ServerState. The engine
1765            // surfaces a clear error; the server-layer dispatch
1766            // intercepts the SQL before it reaches the engine on
1767            // a server build, so this arm only fires for
1768            // engine-only callers (spg-embedded, lib tests).
1769            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1770                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1771            )),
1772            // v6.2.0 — ANALYZE recomputes per-column histograms.
1773            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
1774            // v6.7.3 — COMPACT COLD SEGMENTS.
1775            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
1776            // v7.12.1 — SET / RESET session parameter. Engine
1777            // tracks the value in `session_params`; FTS dispatcher
1778            // reads `default_text_search_config`. Everything else
1779            // is a recorded no-op (PG dump compat).
1780            Statement::SetParameter { name, value } => {
1781                self.set_session_param(name, value);
1782                Ok(QueryResult::CommandOk {
1783                    affected: 0,
1784                    modified_catalog: false,
1785                })
1786            }
1787            // v7.14.0 — MySQL multi-assignment SET. Each pair runs
1788            // through `set_session_param` so engine-known params
1789            // (FOREIGN_KEY_CHECKS, session_replication_role, …) take
1790            // effect; unknown pairs (including `@VAR` LHS from the
1791            // mysqldump preamble) are recorded then ignored.
1792            Statement::SetParameterList(pairs) => {
1793                for (name, value) in pairs {
1794                    self.set_session_param(name, value);
1795                }
1796                Ok(QueryResult::CommandOk {
1797                    affected: 0,
1798                    modified_catalog: false,
1799                })
1800            }
1801            // v7.12.4 — CREATE FUNCTION / CREATE TRIGGER / DROP …
1802            // for the PL/pgSQL trigger surface. exec_* methods are
1803            // defined alongside the existing CREATE handlers below.
1804            Statement::CreateFunction(s) => self.exec_create_function(s),
1805            Statement::CreateTrigger(s) => self.exec_create_trigger(s),
1806            Statement::DropTrigger {
1807                name,
1808                table,
1809                if_exists,
1810            } => self.exec_drop_trigger(&name, &table, if_exists),
1811            Statement::DropFunction { name, if_exists } => {
1812                self.exec_drop_function(&name, if_exists)
1813            }
1814            Statement::ResetParameter(target) => {
1815                match target {
1816                    None => self.session_params.clear(),
1817                    Some(name) => {
1818                        self.session_params.remove(&name.to_ascii_lowercase());
1819                    }
1820                }
1821                Ok(QueryResult::CommandOk {
1822                    affected: 0,
1823                    modified_catalog: false,
1824                })
1825            }
1826        };
1827        self.enforce_row_limit(result)
1828    }
1829
1830    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
1831    /// surface as `EngineError::Unsupported` so the existing PG-wire
1832    /// error mapping stays uniform; the message carries the name so
1833    /// operators can grep replication-log noise. Inside-transaction
1834    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
1835    /// stance) — replication-catalog mutation is a connection-level
1836    /// administrative op, not a transactional one.
1837    fn exec_create_publication(
1838        &mut self,
1839        s: CreatePublicationStatement,
1840    ) -> Result<QueryResult, EngineError> {
1841        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
1842        // was over-cautious: it also blocked the auto-commit wrap
1843        // path (which begins an internal TX around every WAL-
1844        // logged statement). PG itself allows CREATE PUBLICATION
1845        // inside a transaction (it rolls back with the TX).
1846        self.publications
1847            .create(s.name, s.scope)
1848            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
1849        Ok(QueryResult::CommandOk {
1850            affected: 1,
1851            modified_catalog: true,
1852        })
1853    }
1854
1855    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
1856    /// no-op when the publication doesn't exist (returns `affected=0`
1857    /// in that case so the wire-level command tag distinguishes
1858    /// "dropped" from "no-op", though both succeed).
1859    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1860        let removed = self.publications.drop(name);
1861        Ok(QueryResult::CommandOk {
1862            affected: usize::from(removed),
1863            modified_catalog: removed,
1864        })
1865    }
1866
1867    /// v6.1.2 — read access to the publication catalog. Used by
1868    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
1869    /// (v6.1.3+), and by e2e tests that need to assert state without
1870    /// going through the wire.
1871    pub const fn publications(&self) -> &publications::Publications {
1872        &self.publications
1873    }
1874
1875    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
1876    /// `enabled = true` and `last_received_pos = 0` for a freshly-
1877    /// created subscription. The actual worker thread is spawned
1878    /// by spg-server once the engine returns success.
1879    fn exec_create_subscription(
1880        &mut self,
1881        s: CreateSubscriptionStatement,
1882    ) -> Result<QueryResult, EngineError> {
1883        // See exec_create_publication — the in_transaction gate
1884        // was over-cautious; the auto-commit wrap path holds an
1885        // internal TX that this check was incorrectly blocking.
1886        let sub = subscriptions::Subscription {
1887            conn_str: s.conn_str,
1888            publications: s.publications,
1889            enabled: true,
1890            last_received_pos: 0,
1891        };
1892        self.subscriptions
1893            .create(s.name, sub)
1894            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
1895        Ok(QueryResult::CommandOk {
1896            affected: 1,
1897            modified_catalog: true,
1898        })
1899    }
1900
1901    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
1902    /// doesn't exist (PG-compatible). The associated worker is
1903    /// torn down by spg-server when it observes the catalog
1904    /// change at the next snapshot or via the engine's
1905    /// subscriptions accessor (the worker polls the catalog on
1906    /// reconnect; v6.1.5's filter-side will tighten this to an
1907    /// explicit signal).
1908    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1909        let removed = self.subscriptions.drop(name);
1910        Ok(QueryResult::CommandOk {
1911            affected: usize::from(removed),
1912            modified_catalog: removed,
1913        })
1914    }
1915
1916    /// v6.1.4 — read access to the subscription catalog. Used by
1917    /// the subscription worker (read its own row to find its
1918    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
1919    /// and by e2e tests asserting state directly.
1920    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
1921        &self.subscriptions
1922    }
1923
1924    /// v6.1.4 — write access to `last_received_pos`. Worker
1925    /// calls this after each apply batch (under the engine's
1926    /// write-lock). Returns `false` when the subscription was
1927    /// dropped between when the worker received the record and
1928    /// when this call landed.
1929    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
1930        self.subscriptions.update_last_received_pos(name, pos)
1931    }
1932
1933    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
1934    /// `(name, conn_str, publications, enabled, last_received_pos)`
1935    /// ordered by subscription name. The `publications` column is
1936    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
1937    /// callers wanting structured access read `Engine::subscriptions`.
1938    fn exec_show_subscriptions(&self) -> QueryResult {
1939        let columns = alloc::vec![
1940            ColumnSchema::new("name", DataType::Text, false),
1941            ColumnSchema::new("conn_str", DataType::Text, false),
1942            ColumnSchema::new("publications", DataType::Text, false),
1943            ColumnSchema::new("enabled", DataType::Bool, false),
1944            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1945        ];
1946        let rows: Vec<Row> = self
1947            .subscriptions
1948            .iter()
1949            .map(|(name, sub)| {
1950                Row::new(alloc::vec![
1951                    Value::Text(name.clone()),
1952                    Value::Text(sub.conn_str.clone()),
1953                    Value::Text(sub.publications.join(", ")),
1954                    Value::Bool(sub.enabled),
1955                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1956                ])
1957            })
1958            .collect();
1959        QueryResult::Rows { columns, rows }
1960    }
1961
1962    /// v6.2.0 — materialise `spg_statistic` rows. One row per
1963    /// `(table, column)` pair tracked in `Statistics`, with
1964    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
1965    /// the same canonical form vector literals use for round-trip.
1966    fn exec_spg_statistic(&self) -> QueryResult {
1967        let columns = alloc::vec![
1968            ColumnSchema::new("table_name", DataType::Text, false),
1969            ColumnSchema::new("column_name", DataType::Text, false),
1970            ColumnSchema::new("null_frac", DataType::Float, false),
1971            ColumnSchema::new("n_distinct", DataType::BigInt, false),
1972            ColumnSchema::new("histogram_bounds", DataType::Text, false),
1973            // v6.7.0 — appended column (v6.2.0 stability contract
1974            // allows APPEND to spg_statistic, not reorder/rename).
1975            // Reports the cached per-table cold-row count; same
1976            // value across every column row of the same table.
1977            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
1978        ];
1979        let rows: Vec<Row> = self
1980            .statistics
1981            .iter()
1982            .map(|((t, c), s)| {
1983                let cold = self
1984                    .catalog
1985                    .get(t)
1986                    .map_or(0, |table| table.cold_row_count());
1987                Row::new(alloc::vec![
1988                    Value::Text(t.clone()),
1989                    Value::Text(c.clone()),
1990                    Value::Float(f64::from(s.null_frac)),
1991                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
1992                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
1993                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
1994                ])
1995            })
1996            .collect();
1997        QueryResult::Rows { columns, rows }
1998    }
1999
2000    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
2001    /// per subscription with `(name, conn_str, publications,
2002    /// last_received_pos, enabled)`. Surface mirrors
2003    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
2004    /// shape so it composes with SELECT clauses (WHERE, projection
2005    /// onto specific columns, etc).
2006    fn exec_spg_stat_replication(&self) -> QueryResult {
2007        let columns = alloc::vec![
2008            ColumnSchema::new("name", DataType::Text, false),
2009            ColumnSchema::new("conn_str", DataType::Text, false),
2010            ColumnSchema::new("publications", DataType::Text, false),
2011            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
2012            ColumnSchema::new("enabled", DataType::Bool, false),
2013        ];
2014        let rows: Vec<Row> = self
2015            .subscriptions
2016            .iter()
2017            .map(|(name, sub)| {
2018                Row::new(alloc::vec![
2019                    Value::Text(name.clone()),
2020                    Value::Text(sub.conn_str.clone()),
2021                    Value::Text(sub.publications.join(",")),
2022                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
2023                    Value::Bool(sub.enabled),
2024                ])
2025            })
2026            .collect();
2027        QueryResult::Rows { columns, rows }
2028    }
2029
2030    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
2031    /// cold-tier segment with `(segment_id, num_rows, num_pages,
2032    /// total_bytes)`.
2033    ///
2034    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
2035    /// carve-out. Walks every user table's BTree indices to find
2036    /// which table's Cold locators point at each segment. Empty
2037    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
2038    /// before any index registered a locator). The walk is
2039    /// O(tables × indices × keys); cached per call, not across
2040    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
2041    fn exec_spg_stat_segment(&self) -> QueryResult {
2042        let columns = alloc::vec![
2043            ColumnSchema::new("segment_id", DataType::BigInt, false),
2044            ColumnSchema::new("table_name", DataType::Text, false),
2045            ColumnSchema::new("num_rows", DataType::BigInt, false),
2046            ColumnSchema::new("num_pages", DataType::BigInt, false),
2047            ColumnSchema::new("total_bytes", DataType::BigInt, false),
2048        ];
2049        // v6.7.0 — build a segment_id → table_name map by walking
2050        // every user table's BTree indices once. O(tables × indices
2051        // × keys) for the v6.5.0 carve-out resolution; acceptable
2052        // because spg_stat_segment is operator-facing (not on a
2053        // hot-loop path).
2054        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
2055        for tname in self.catalog.table_names() {
2056            if is_internal_table_name(&tname) {
2057                continue;
2058            }
2059            let Some(t) = self.catalog.get(&tname) else {
2060                continue;
2061            };
2062            for idx in t.indices() {
2063                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
2064                    for (_, locs) in map.iter() {
2065                        for loc in locs {
2066                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
2067                                segment_owners
2068                                    .entry(*segment_id)
2069                                    .or_insert_with(|| tname.clone());
2070                            }
2071                        }
2072                    }
2073                }
2074            }
2075        }
2076        let rows: Vec<Row> = self
2077            .catalog
2078            .cold_segment_ids_global()
2079            .iter()
2080            .filter_map(|&id| {
2081                let seg = self.catalog.cold_segment(id)?;
2082                let meta = seg.meta();
2083                let owner = segment_owners.get(&id).cloned().unwrap_or_default();
2084                Some(Row::new(alloc::vec![
2085                    Value::BigInt(i64::from(id)),
2086                    Value::Text(owner),
2087                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
2088                    Value::BigInt(i64::from(meta.num_pages)),
2089                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
2090                ]))
2091            })
2092            .collect();
2093        QueryResult::Rows { columns, rows }
2094    }
2095
2096    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
2097    /// distinct SQL text recorded since the engine booted, capped
2098    /// at `QUERY_STATS_MAX` (1024). Columns:
2099    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
2100    /// mean_us = total_us / exec_count (saturating).
2101    fn exec_spg_stat_query(&self) -> QueryResult {
2102        let columns = alloc::vec![
2103            ColumnSchema::new("sql", DataType::Text, false),
2104            ColumnSchema::new("exec_count", DataType::BigInt, false),
2105            ColumnSchema::new("total_us", DataType::BigInt, false),
2106            ColumnSchema::new("mean_us", DataType::BigInt, false),
2107            ColumnSchema::new("max_us", DataType::BigInt, false),
2108            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
2109        ];
2110        let rows: Vec<Row> = self
2111            .query_stats
2112            .snapshot()
2113            .into_iter()
2114            .map(|(sql, s)| {
2115                let mean = if s.exec_count == 0 {
2116                    0
2117                } else {
2118                    s.total_us / s.exec_count
2119                };
2120                Row::new(alloc::vec![
2121                    Value::Text(sql),
2122                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
2123                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
2124                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
2125                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
2126                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
2127                ])
2128            })
2129            .collect();
2130        QueryResult::Rows { columns, rows }
2131    }
2132
2133    /// v6.5.2 — register a connection-state provider. spg-server
2134    /// calls this at startup with a function that snapshots its
2135    /// per-pgwire-connection registry. Engine reads through the
2136    /// callback on `SELECT * FROM spg_stat_activity`.
2137    #[must_use]
2138    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
2139        self.activity_provider = Some(f);
2140        self
2141    }
2142
2143    /// v6.5.3 — register audit chain provider + verifier.
2144    #[must_use]
2145    pub const fn with_audit_providers(
2146        mut self,
2147        chain: AuditChainProvider,
2148        verify: AuditVerifier,
2149    ) -> Self {
2150        self.audit_chain_provider = Some(chain);
2151        self.audit_verifier = Some(verify);
2152        self
2153    }
2154
2155    /// v6.5.6 — register a slow-query log callback. `threshold_us`
2156    /// is the floor (in microseconds); only executes above the floor
2157    /// fire the callback. spg-server wires this from
2158    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
2159    #[must_use]
2160    pub const fn with_slow_query_log(mut self, threshold_us: u64, logger: SlowQueryLogger) -> Self {
2161        self.slow_query_threshold_us = Some(threshold_us);
2162        self.slow_query_logger = Some(logger);
2163        self
2164    }
2165
2166    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
2167    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
2168    /// the compile-time default of 256.
2169    pub fn set_plan_cache_max(&mut self, n: usize) {
2170        self.plan_cache.set_max_entries(n);
2171    }
2172
2173    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
2174    /// snapshot from the registered `ActivityProvider`. Returns an
2175    /// empty result set when no provider is registered (the no_std
2176    /// embedded path with no pgwire layer).
2177    fn exec_spg_stat_activity(&self) -> QueryResult {
2178        let columns = alloc::vec![
2179            ColumnSchema::new("pid", DataType::Int, false),
2180            ColumnSchema::new("user", DataType::Text, false),
2181            ColumnSchema::new("started_at_us", DataType::BigInt, false),
2182            ColumnSchema::new("current_sql", DataType::Text, false),
2183            ColumnSchema::new("wait_event", DataType::Text, false),
2184            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
2185            ColumnSchema::new("in_transaction", DataType::Bool, false),
2186        ];
2187        let rows: Vec<Row> = self
2188            .activity_provider
2189            .map(|f| f())
2190            .unwrap_or_default()
2191            .into_iter()
2192            .map(|r| {
2193                Row::new(alloc::vec![
2194                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
2195                    Value::Text(r.user),
2196                    Value::BigInt(r.started_at_us),
2197                    Value::Text(r.current_sql),
2198                    Value::Text(r.wait_event),
2199                    Value::BigInt(r.elapsed_us),
2200                    Value::Bool(r.in_transaction),
2201                ])
2202            })
2203            .collect();
2204        QueryResult::Rows { columns, rows }
2205    }
2206
2207    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
2208    /// table with `(table_name, ddl)`. Reconstructed from catalog
2209    /// state on demand.
2210    fn exec_spg_table_ddl(&self) -> QueryResult {
2211        let columns = alloc::vec![
2212            ColumnSchema::new("table_name", DataType::Text, false),
2213            ColumnSchema::new("ddl", DataType::Text, false),
2214        ];
2215        let rows: Vec<Row> = self
2216            .catalog
2217            .table_names()
2218            .into_iter()
2219            .filter(|n| !is_internal_table_name(n))
2220            .filter_map(|name| {
2221                let table = self.catalog.get(&name)?;
2222                let ddl = render_create_table(&name, &table.schema().columns);
2223                Some(Row::new(alloc::vec![Value::Text(name), Value::Text(ddl),]))
2224            })
2225            .collect();
2226        QueryResult::Rows { columns, rows }
2227    }
2228
2229    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
2230    /// with `(role_name, ddl)`. Password is redacted (matches the
2231    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
2232    fn exec_spg_role_ddl(&self) -> QueryResult {
2233        let columns = alloc::vec![
2234            ColumnSchema::new("role_name", DataType::Text, false),
2235            ColumnSchema::new("ddl", DataType::Text, false),
2236        ];
2237        let rows: Vec<Row> = self
2238            .users
2239            .iter()
2240            .map(|(name, rec)| {
2241                let ddl = alloc::format!(
2242                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
2243                    rec.role.as_str(),
2244                );
2245                Row::new(alloc::vec![
2246                    Value::Text(String::from(name)),
2247                    Value::Text(ddl)
2248                ])
2249            })
2250            .collect();
2251        QueryResult::Rows { columns, rows }
2252    }
2253
2254    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
2255    /// `ddl` column concatenates every user table's CREATE +
2256    /// every role's CREATE in deterministic catalog order. Suitable
2257    /// for piping back through `Engine::execute` to recreate a
2258    /// schema-equivalent database.
2259    fn exec_spg_database_ddl(&self) -> QueryResult {
2260        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
2261        let mut out = String::new();
2262        for (name, rec) in self.users.iter() {
2263            out.push_str(&alloc::format!(
2264                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
2265                rec.role.as_str(),
2266            ));
2267        }
2268        for name in self.catalog.table_names() {
2269            if is_internal_table_name(&name) {
2270                continue;
2271            }
2272            if let Some(table) = self.catalog.get(&name) {
2273                out.push_str(&render_create_table(&name, &table.schema().columns));
2274                out.push_str(";\n");
2275            }
2276        }
2277        QueryResult::Rows {
2278            columns,
2279            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
2280        }
2281    }
2282
2283    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
2284    /// snapshot from the registered provider; empty when no
2285    /// provider is set.
2286    fn exec_spg_audit_chain(&self) -> QueryResult {
2287        let columns = alloc::vec![
2288            ColumnSchema::new("seq", DataType::BigInt, false),
2289            ColumnSchema::new("ts_ms", DataType::BigInt, false),
2290            ColumnSchema::new("prev_hash", DataType::Text, false),
2291            ColumnSchema::new("entry_hash", DataType::Text, false),
2292            ColumnSchema::new("sql", DataType::Text, false),
2293        ];
2294        let rows: Vec<Row> = self
2295            .audit_chain_provider
2296            .map(|f| f())
2297            .unwrap_or_default()
2298            .into_iter()
2299            .map(|r| {
2300                Row::new(alloc::vec![
2301                    Value::BigInt(r.seq),
2302                    Value::BigInt(r.ts_ms),
2303                    Value::Text(r.prev_hash_hex),
2304                    Value::Text(r.entry_hash_hex),
2305                    Value::Text(r.sql),
2306                ])
2307            })
2308            .collect();
2309        QueryResult::Rows { columns, rows }
2310    }
2311
2312    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
2313    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
2314    /// on a clean chain. Returns one row with both values 0 when
2315    /// no verifier is registered (no-data fallback for embedded
2316    /// callers).
2317    fn exec_spg_audit_verify(&self) -> QueryResult {
2318        let columns = alloc::vec![
2319            ColumnSchema::new("verified_count", DataType::BigInt, false),
2320            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
2321        ];
2322        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
2323        let row = Row::new(alloc::vec![Value::BigInt(verified), Value::BigInt(broken),]);
2324        QueryResult::Rows {
2325            columns,
2326            rows: alloc::vec![row],
2327        }
2328    }
2329
2330    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
2331    pub fn query_stats(&self) -> &query_stats::QueryStats {
2332        &self.query_stats
2333    }
2334
2335    /// v6.5.1 — mutable accessor (clear, etc).
2336    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
2337        &mut self.query_stats
2338    }
2339
2340    /// v6.2.0 — read access to the per-column statistics table.
2341    /// Used by the planner (v6.2.2 selectivity functions read this),
2342    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
2343    pub const fn statistics(&self) -> &statistics::Statistics {
2344        &self.statistics
2345    }
2346
2347    /// v6.2.1 — return tables whose modified-row count crossed the
2348    /// auto-analyze threshold since the last ANALYZE on that table.
2349    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
2350    /// ANALYZE)` — combines PG-style fractional + absolute lower
2351    /// bound so a fresh / tiny table doesn't get hammered on every
2352    /// INSERT.
2353    ///
2354    /// Designed to be cheap: walks every user table's
2355    /// `Catalog::table_names()` + reads `statistics::modified_
2356    /// since_last_analyze()` (BTreeMap lookup). The background
2357    /// worker calls this under `engine.read()` then drops the lock
2358    /// before re-acquiring `engine.write()` for the actual ANALYZE.
2359    pub fn tables_needing_analyze(&self) -> Vec<String> {
2360        const MIN_ROWS: u64 = 100;
2361        let mut out = Vec::new();
2362        for name in self.catalog.table_names() {
2363            if is_internal_table_name(&name) {
2364                continue;
2365            }
2366            let Some(table) = self.catalog.get(&name) else {
2367                continue;
2368            };
2369            let row_count = table.rows().len() as u64;
2370            let modified = self.statistics.modified_since_last_analyze(&name);
2371            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
2372            // computed in integer arithmetic so spg-engine stays
2373            // no_std without pulling in libm. `(n + 9) / 10` is
2374            // `ceil(n / 10)` for non-negative `n`.
2375            let base = row_count.max(MIN_ROWS);
2376            let threshold = base.saturating_add(9) / 10;
2377            if modified >= threshold {
2378                out.push(name);
2379            }
2380        }
2381        out
2382    }
2383
2384    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2385    /// every user table; `ANALYZE <name>` re-stats one. For each
2386    /// target table, single-pass scan + per-column histogram +
2387    /// `null_frac` + `n_distinct`. Replaces the table's prior
2388    /// stats; resets the modified-row counter.
2389    ///
2390    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2391    /// can add reservoir sampling at the > 100 K-row mark; not a
2392    /// scope blocker for the current commit since rows ≤ 100 K
2393    /// analyse in milliseconds.
2394    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2395        let names: Vec<String> = if let Some(name) = target {
2396            // Verify the table exists; surface a clear error if not.
2397            if self.catalog.get(name).is_none() {
2398                return Err(EngineError::Storage(StorageError::TableNotFound {
2399                    name: name.to_string(),
2400                }));
2401            }
2402            alloc::vec![name.to_string()]
2403        } else {
2404            self.catalog
2405                .table_names()
2406                .into_iter()
2407                .filter(|n| !is_internal_table_name(n))
2408                .collect()
2409        };
2410        let mut analysed = 0usize;
2411        for table_name in &names {
2412            self.analyze_one_table(table_name)?;
2413            analysed += 1;
2414        }
2415        // v6.3.1 — plan cache invalidation. Bump stats version so
2416        // future lookups see the new generation, and selectively
2417        // evict every plan whose `source_tables` overlap with the
2418        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2419        // whole cache.
2420        if analysed > 0 {
2421            self.statistics.bump_version();
2422            if target.is_some() {
2423                for t in &names {
2424                    self.plan_cache.evict_referencing(t);
2425                }
2426            } else {
2427                self.plan_cache.clear();
2428            }
2429        }
2430        Ok(QueryResult::CommandOk {
2431            affected: analysed,
2432            modified_catalog: true,
2433        })
2434    }
2435
2436    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2437    /// engine-layer compaction shim with the default
2438    /// 4 MiB segment-size threshold. spg-server intercepts the
2439    /// SQL before it reaches the engine on a server build —
2440    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2441    /// `Engine::compact_cold_segments_with_target` directly with
2442    /// the env value, and persists every merged segment to
2443    /// v7.12.1 — record a `SET <name> = <value>` parameter. Names
2444    /// are case-folded to lowercase to match PG; values keep their
2445    /// caller-supplied form so observability paths see what was
2446    /// requested. Only `default_text_search_config` is consulted by
2447    /// the engine today.
2448    fn set_session_param(&mut self, name: String, value: spg_sql::ast::SetValue) {
2449        let normalised = match value {
2450            spg_sql::ast::SetValue::String(s) => s,
2451            spg_sql::ast::SetValue::Ident(s) => s,
2452            spg_sql::ast::SetValue::Number(s) => s,
2453            spg_sql::ast::SetValue::Default => String::new(),
2454        };
2455        let key = name.to_ascii_lowercase();
2456        // v7.14.0 — mysqldump preamble emits
2457        // `SET FOREIGN_KEY_CHECKS=0` so it can CREATE TABLE in any
2458        // order despite cross-table FK references; the closing
2459        // section emits `SET FOREIGN_KEY_CHECKS=1` (or
2460        // `=@OLD_FOREIGN_KEY_CHECKS` which resolves to "ON" in our
2461        // session-variable-aware path). Match both shapes.
2462        // Also accept PG's `session_replication_role = 'replica'`
2463        // which suppresses trigger + FK enforcement during a
2464        // logical replication apply (pg_dump preserves this for
2465        // schema-only mode but it shows up in some restores).
2466        let value_off = matches!(
2467            normalised.to_ascii_lowercase().as_str(),
2468            "0" | "off" | "false"
2469        );
2470        let value_on = matches!(
2471            normalised.to_ascii_lowercase().as_str(),
2472            "1" | "on" | "true"
2473        );
2474        if key == "foreign_key_checks"
2475            || key == "session_replication_role" && normalised.eq_ignore_ascii_case("replica")
2476        {
2477            if value_off || key == "session_replication_role" {
2478                self.foreign_key_checks = false;
2479            } else if value_on
2480                || (key == "session_replication_role" && normalised.eq_ignore_ascii_case("origin"))
2481            {
2482                self.foreign_key_checks = true;
2483                // Drain pending FK queue against the now-complete
2484                // catalog. Errors here surface as the SET reply —
2485                // caller knows enabling checks revealed orphans.
2486                let _ = self.drain_pending_foreign_keys();
2487            }
2488        }
2489        self.session_params.insert(key, normalised);
2490    }
2491
2492    /// v7.14.0 — resolve every queued FK whose installation was
2493    /// deferred (`SET FOREIGN_KEY_CHECKS=0` window). Called by
2494    /// `set_session_param` when checks flip back on and by the
2495    /// drop-import release gate. Each FK is resolved against the
2496    /// current catalog; remaining missing-parent errors propagate
2497    /// up so the caller knows the import was incomplete.
2498    fn drain_pending_foreign_keys(&mut self) -> Result<(), EngineError> {
2499        let pending = core::mem::take(&mut self.pending_foreign_keys);
2500        for (child, fk) in pending {
2501            // Resolve against the current catalog. Skip silently
2502            // when the child table itself was dropped between
2503            // queue + drain.
2504            let cols_snapshot = match self.active_catalog().get(&child) {
2505                Some(t) => t.schema().columns.clone(),
2506                None => continue,
2507            };
2508            let storage_fk =
2509                resolve_foreign_key(&child, &cols_snapshot, fk, self.active_catalog())?;
2510            let table = self
2511                .active_catalog_mut()
2512                .get_mut(&child)
2513                .expect("checked above");
2514            table.schema_mut().foreign_keys.push(storage_fk);
2515        }
2516        Ok(())
2517    }
2518
2519    /// v7.12.1 — read a session parameter set via `SET`. Used by
2520    /// the FTS function dispatcher to resolve the default config
2521    /// for `to_tsvector(text)` / `plainto_tsquery(text)` etc.
2522    #[must_use]
2523    pub fn session_param(&self, name: &str) -> Option<&str> {
2524        self.session_params
2525            .get(&name.to_ascii_lowercase())
2526            .map(String::as_str)
2527    }
2528
2529    /// v7.12.1 — build an `EvalContext` chained with the session's
2530    /// `default_text_search_config`. Engine-internal callers use
2531    /// this instead of `EvalContext::new` so the FTS function
2532    /// dispatcher sees the SET configuration.
2533    fn ev_ctx<'a>(
2534        &'a self,
2535        columns: &'a [ColumnSchema],
2536        alias: Option<&'a str>,
2537    ) -> EvalContext<'a> {
2538        EvalContext::new(columns, alias)
2539            .with_default_text_search_config(self.session_param("default_text_search_config"))
2540    }
2541
2542    /// `<db>.spg/segments/`. This arm only fires for engine-only
2543    /// callers (spg-embedded, lib tests); in that mode merged
2544    /// segments live in memory and are dropped at process exit.
2545    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2546        let target = COMPACTION_TARGET_DEFAULT_BYTES;
2547        let reports = self.compact_cold_segments_with_target(target)?;
2548        let columns = alloc::vec![
2549            ColumnSchema::new("table_name", DataType::Text, false),
2550            ColumnSchema::new("index_name", DataType::Text, false),
2551            ColumnSchema::new("sources_merged", DataType::BigInt, false),
2552            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2553            ColumnSchema::new("merged_rows", DataType::BigInt, false),
2554            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2555            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2556        ];
2557        let rows: Vec<Row> = reports
2558            .into_iter()
2559            .map(|(tname, iname, report)| {
2560                Row::new(alloc::vec![
2561                    Value::Text(tname),
2562                    Value::Text(iname),
2563                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2564                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2565                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2566                    Value::BigInt(i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),),
2567                    Value::BigInt(
2568                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2569                    ),
2570                ])
2571            })
2572            .collect();
2573        Ok(QueryResult::Rows { columns, rows })
2574    }
2575
2576    /// Walk a single table's rows once and (re-)populate per-column
2577    /// stats. Drops the existing stats for `table` first so columns
2578    /// that have been DROP-ed between ANALYZEs don't leave stale
2579    /// rows.
2580    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2581        let table = self.catalog.get(table_name).ok_or_else(|| {
2582            EngineError::Storage(StorageError::TableNotFound {
2583                name: table_name.to_string(),
2584            })
2585        })?;
2586        let schema = table.schema().clone();
2587        let row_count = table.rows().len();
2588        // For each column, collect (sorted) non-NULL textual values
2589        // + count NULLs; then ask `statistics::build_histogram` to
2590        // produce the 101 bounds and `estimate_n_distinct` the
2591        // distinct count.
2592        self.statistics.clear_table(table_name);
2593        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2594            // v6.2.0 skip: vector columns have their own stats
2595            // shape (HNSW graph topology). v6.2 deliberation #1.
2596            if matches!(col_schema.ty, DataType::Vector { .. }) {
2597                continue;
2598            }
2599            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2600            let mut nulls: u64 = 0;
2601            for row in table.rows() {
2602                match row.values.get(col_pos) {
2603                    Some(Value::Null) | None => nulls += 1,
2604                    Some(v) => non_null_values.push(v.clone()),
2605                }
2606            }
2607            // Sort by type-aware ordering (Int as int, Text as
2608            // lex, etc.) so histogram bounds reflect the column's
2609            // natural order — not lexicographic on the string
2610            // representation, which would put "9" after "49".
2611            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2612            let non_null: Vec<String> = non_null_values.iter().map(canonical_value_repr).collect();
2613            let null_frac = if row_count == 0 {
2614                0.0
2615            } else {
2616                #[allow(clippy::cast_precision_loss)]
2617                let f = nulls as f32 / row_count as f32;
2618                f
2619            };
2620            let n_distinct = statistics::estimate_n_distinct(&non_null);
2621            let histogram_bounds = statistics::build_histogram(&non_null);
2622            self.statistics.set(
2623                table_name.to_string(),
2624                col_schema.name.clone(),
2625                statistics::ColumnStats {
2626                    null_frac,
2627                    n_distinct,
2628                    histogram_bounds,
2629                },
2630            );
2631        }
2632        self.statistics.reset_modified(table_name);
2633        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
2634        // BTree indices and count Cold locators (MAX across
2635        // indices); store the result on the table. Surfaced via
2636        // `spg_statistic.cold_row_count` (new column) and
2637        // `spg_stat_segment.table_name` (new column).
2638        let cold_count = {
2639            let table = self
2640                .active_catalog()
2641                .get(table_name)
2642                .expect("table still present");
2643            table.count_cold_locators()
2644        };
2645        let table_mut = self
2646            .active_catalog_mut()
2647            .get_mut(table_name)
2648            .expect("table still present");
2649        table_mut.set_cold_row_count(cold_count);
2650        Ok(())
2651    }
2652
2653    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
2654    /// `(name, scope, table_count)` ordered by publication name.
2655    ///   - `scope` is the human-readable string:
2656    ///       `"FOR ALL TABLES"` /
2657    ///       `"FOR TABLE t1, t2"` /
2658    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
2659    ///   - `table_count` is NULL for `AllTables`, the list length
2660    ///     otherwise. NULLability lets clients distinguish "publish
2661    ///     everything" from "publish exactly 0 tables" (the v6.1.3
2662    ///     parser forbids the empty list, but the column shape is
2663    ///     ready for the v6.1.5 publisher-side semantics).
2664    fn exec_show_publications(&self) -> QueryResult {
2665        let columns = alloc::vec![
2666            ColumnSchema::new("name", DataType::Text, false),
2667            ColumnSchema::new("scope", DataType::Text, false),
2668            ColumnSchema::new("table_count", DataType::Int, true),
2669        ];
2670        let rows: Vec<Row> = self
2671            .publications
2672            .iter()
2673            .map(|(name, scope)| {
2674                let (scope_str, count_val) = match scope {
2675                    spg_sql::ast::PublicationScope::AllTables => {
2676                        ("FOR ALL TABLES".to_string(), Value::Null)
2677                    }
2678                    spg_sql::ast::PublicationScope::ForTables(ts) => (
2679                        alloc::format!("FOR TABLE {}", ts.join(", ")),
2680                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2681                    ),
2682                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
2683                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
2684                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2685                    ),
2686                };
2687                Row::new(alloc::vec![
2688                    Value::Text(name.clone()),
2689                    Value::Text(scope_str),
2690                    count_val,
2691                ])
2692            })
2693            .collect();
2694        QueryResult::Rows { columns, rows }
2695    }
2696
2697    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
2698    fn exec_show_users(&self) -> QueryResult {
2699        let columns = alloc::vec![
2700            ColumnSchema::new("name", DataType::Text, false),
2701            ColumnSchema::new("role", DataType::Text, false),
2702        ];
2703        let rows: Vec<Row> = self
2704            .users
2705            .iter()
2706            .map(|(name, rec)| {
2707                Row::new(alloc::vec![
2708                    Value::Text(name.to_string()),
2709                    Value::Text(rec.role.as_str().to_string()),
2710                ])
2711            })
2712            .collect();
2713        QueryResult::Rows { columns, rows }
2714    }
2715
2716    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
2717        if self.in_transaction() {
2718            return Err(EngineError::Unsupported(
2719                "CREATE USER is not allowed inside a transaction".into(),
2720            ));
2721        }
2722        let role = users::Role::parse(&s.role).ok_or_else(|| {
2723            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
2724        })?;
2725        // Prefer the host-injected RNG. Falls back to a deterministic
2726        // salt derived from the username only when no RNG is wired —
2727        // acceptable for tests; the server always installs one.
2728        let salt = self.salt_fn.map_or_else(
2729            || {
2730                let mut s_bytes = [0u8; 16];
2731                let digest = spg_crypto::hash(s.name.as_bytes());
2732                s_bytes.copy_from_slice(&digest[..16]);
2733                s_bytes
2734            },
2735            |f| f(),
2736        );
2737        self.users
2738            .create(&s.name, &s.password, role, salt)
2739            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
2740        Ok(QueryResult::CommandOk {
2741            affected: 1,
2742            modified_catalog: true,
2743        })
2744    }
2745
2746    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2747        if self.in_transaction() {
2748            return Err(EngineError::Unsupported(
2749                "DROP USER is not allowed inside a transaction".into(),
2750            ));
2751        }
2752        self.users
2753            .drop(name)
2754            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
2755        Ok(QueryResult::CommandOk {
2756            affected: 1,
2757            modified_catalog: true,
2758        })
2759    }
2760
2761    /// v7.12.4 — `CREATE [OR REPLACE] FUNCTION`. Stores the
2762    /// function metadata in the catalog. PL/pgSQL bodies are
2763    /// already parsed by the SQL parser; we re-canonicalise the
2764    /// body to source text for storage (the executor re-parses
2765    /// it at trigger fire time — see the trigger fire path).
2766    fn exec_create_function(
2767        &mut self,
2768        s: spg_sql::ast::CreateFunctionStatement,
2769    ) -> Result<QueryResult, EngineError> {
2770        let args_repr = render_function_args(&s.args);
2771        let returns = match &s.returns {
2772            spg_sql::ast::FunctionReturn::Trigger => alloc::string::String::from("TRIGGER"),
2773            spg_sql::ast::FunctionReturn::Void => alloc::string::String::from("VOID"),
2774            spg_sql::ast::FunctionReturn::Type(t) => alloc::format!("{t}"),
2775            spg_sql::ast::FunctionReturn::Other(s) => s.clone(),
2776        };
2777        let body_text = match &s.body {
2778            spg_sql::ast::FunctionBody::PlPgSql(b) => alloc::format!("{b}"),
2779            spg_sql::ast::FunctionBody::Raw(s) => s.clone(),
2780        };
2781        let def = spg_storage::FunctionDef {
2782            name: s.name.clone(),
2783            args_repr,
2784            returns,
2785            language: s.language.clone(),
2786            body: body_text,
2787        };
2788        self.active_catalog_mut()
2789            .create_function(def, s.or_replace)
2790            .map_err(EngineError::Storage)?;
2791        Ok(QueryResult::CommandOk {
2792            affected: 0,
2793            modified_catalog: true,
2794        })
2795    }
2796
2797    /// v7.12.4 — `CREATE [OR REPLACE] TRIGGER`. The referenced
2798    /// function must already exist in the catalog (forward
2799    /// references defer to a later release). Persists the
2800    /// trigger metadata for the row-write hooks below to consult.
2801    fn exec_create_trigger(
2802        &mut self,
2803        s: spg_sql::ast::CreateTriggerStatement,
2804    ) -> Result<QueryResult, EngineError> {
2805        let timing = match s.timing {
2806            spg_sql::ast::TriggerTiming::Before => "BEFORE",
2807            spg_sql::ast::TriggerTiming::After => "AFTER",
2808            spg_sql::ast::TriggerTiming::InsteadOf => "INSTEAD OF",
2809        };
2810        let events: Vec<alloc::string::String> = s
2811            .events
2812            .iter()
2813            .map(|e| match e {
2814                spg_sql::ast::TriggerEvent::Insert => alloc::string::String::from("INSERT"),
2815                spg_sql::ast::TriggerEvent::Update => alloc::string::String::from("UPDATE"),
2816                spg_sql::ast::TriggerEvent::Delete => alloc::string::String::from("DELETE"),
2817                spg_sql::ast::TriggerEvent::Truncate => alloc::string::String::from("TRUNCATE"),
2818            })
2819            .collect();
2820        let for_each = match s.for_each {
2821            spg_sql::ast::TriggerForEach::Row => "ROW",
2822            spg_sql::ast::TriggerForEach::Statement => "STATEMENT",
2823        };
2824        let def = spg_storage::TriggerDef {
2825            name: s.name.clone(),
2826            table: s.table.clone(),
2827            timing: alloc::string::String::from(timing),
2828            events,
2829            for_each: alloc::string::String::from(for_each),
2830            function: s.function.clone(),
2831            update_columns: s.update_columns.clone(),
2832            // v7.16.1 — every trigger is born enabled. Toggled
2833            // by ALTER TABLE … { ENABLE | DISABLE } TRIGGER.
2834            enabled: true,
2835        };
2836        self.active_catalog_mut()
2837            .create_trigger(def, s.or_replace)
2838            .map_err(EngineError::Storage)?;
2839        Ok(QueryResult::CommandOk {
2840            affected: 0,
2841            modified_catalog: true,
2842        })
2843    }
2844
2845    fn exec_drop_trigger(
2846        &mut self,
2847        name: &str,
2848        table: &str,
2849        if_exists: bool,
2850    ) -> Result<QueryResult, EngineError> {
2851        let removed = self.active_catalog_mut().drop_trigger(name, table);
2852        if !removed && !if_exists {
2853            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2854                alloc::format!("trigger {name:?} on {table:?} does not exist"),
2855            )));
2856        }
2857        Ok(QueryResult::CommandOk {
2858            affected: usize::from(removed),
2859            modified_catalog: removed,
2860        })
2861    }
2862
2863    fn exec_drop_function(
2864        &mut self,
2865        name: &str,
2866        if_exists: bool,
2867    ) -> Result<QueryResult, EngineError> {
2868        let removed = self.active_catalog_mut().drop_function(name);
2869        if !removed && !if_exists {
2870            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2871                alloc::format!("function {name:?} does not exist"),
2872            )));
2873        }
2874        Ok(QueryResult::CommandOk {
2875            affected: usize::from(removed),
2876            modified_catalog: removed,
2877        })
2878    }
2879
2880    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
2881    /// Filter pass uses the same WHERE eval as `exec_select`. Per
2882    /// matched row, evaluate each RHS expression against the *old*
2883    /// row, then call `Table::update_row` which rebuilds indices.
2884    /// Indexed columns are correctly reflected because rebuild
2885    /// happens after the cell rewrite.
2886    fn exec_update_cancel(
2887        &mut self,
2888        stmt: &spg_sql::ast::UpdateStatement,
2889        cancel: CancelToken<'_>,
2890    ) -> Result<QueryResult, EngineError> {
2891        // v7.12.5 — snapshot BEFORE/AFTER UPDATE row triggers + the
2892        // session FTS config before the table mut-borrow opens (the
2893        // INSERT path uses the same pattern). Empty vecs are the
2894        // common "no triggers on this table" fast path.
2895        // v7.13.0 — UPDATE triggers carry an optional `UPDATE OF
2896        // cols` filter. The filter is paired with each function so
2897        // the per-row fire loop can skip when no listed column
2898        // actually differs between OLD and NEW.
2899        let before_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "BEFORE");
2900        let after_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "AFTER");
2901        let trigger_session_cfg: Option<String> = self
2902            .session_params
2903            .get("default_text_search_config")
2904            .cloned();
2905        // v5.2.3: if the WHERE is a PK equality and matches a cold-
2906        // tier row, promote it back to the hot tier *before* the
2907        // hot-row walk. The promote pushes the row to the end of
2908        // `table.rows`, where the upcoming SET-evaluation loop will
2909        // pick it up and apply the assignments. Lookups for the key
2910        // never observe a gap because `promote_cold_row` inserts the
2911        // hot row before retiring the cold locator.
2912        if let Some(w) = &stmt.where_ {
2913            let schema_cols = self
2914                .active_catalog()
2915                .get(&stmt.table)
2916                .ok_or_else(|| {
2917                    EngineError::Storage(StorageError::TableNotFound {
2918                        name: stmt.table.clone(),
2919                    })
2920                })?
2921                .schema()
2922                .columns
2923                .clone();
2924            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2925                && let Some(idx_name) = self
2926                    .active_catalog()
2927                    .get(&stmt.table)
2928                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2929            {
2930                // Promote may be a no-op (key is hot-only or absent);
2931                // we don't care about the return value here — the
2932                // subsequent hot walk will either match or not.
2933                let _ = self
2934                    .active_catalog_mut()
2935                    .promote_cold_row(&stmt.table, &idx_name, &key);
2936            }
2937        }
2938
2939        // v7.12.1 — cache session FTS config before the table
2940        // mut-borrow (same reason as exec_delete).
2941        let ts_cfg: Option<String> = self
2942            .session_param("default_text_search_config")
2943            .map(String::from);
2944        let table = self
2945            .active_catalog_mut()
2946            .get_mut(&stmt.table)
2947            .ok_or_else(|| {
2948                EngineError::Storage(StorageError::TableNotFound {
2949                    name: stmt.table.clone(),
2950                })
2951            })?;
2952        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2953        // Resolve each SET target to a column position once, validate
2954        // up front so a typo'd column doesn't leave a partial mutation
2955        // behind.
2956        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
2957        for (col, expr) in &stmt.assignments {
2958            let pos = schema_cols
2959                .iter()
2960                .position(|c| c.name == *col)
2961                .ok_or_else(|| {
2962                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
2963                })?;
2964            targets.push((pos, expr));
2965        }
2966        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
2967            .with_default_text_search_config(ts_cfg.as_deref());
2968        // Walk every row, evaluate WHERE then SET expressions. We
2969        // gather (position, new_values) tuples first and apply them
2970        // afterwards so the WHERE/RHS evaluation reads the original
2971        // row state — matches PG semantics (UPDATE doesn't see its
2972        // own writes).
2973        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
2974        for (i, row) in table.rows().iter().enumerate() {
2975            // v4.5: cooperative cancel checkpoint every 256 rows so
2976            // a runaway UPDATE without WHERE doesn't drag past the
2977            // server's query-timeout watchdog.
2978            if i.is_multiple_of(256) {
2979                cancel.check()?;
2980            }
2981            if let Some(w) = &stmt.where_ {
2982                let cond = eval::eval_expr(w, row, &ctx)?;
2983                if !matches!(cond, Value::Bool(true)) {
2984                    continue;
2985                }
2986            }
2987            let mut new_vals = row.values.clone();
2988            for (pos, expr) in &targets {
2989                let v = eval::eval_expr(expr, row, &ctx)?;
2990                new_vals[*pos] =
2991                    coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
2992            }
2993            planned.push((i, new_vals));
2994        }
2995        // v7.6.6 — capture pre-update row values for the FK
2996        // enforcement passes below. `planned` carries new values
2997        // only; pair them with the old row.
2998        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
2999            .iter()
3000            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
3001            .collect();
3002        let self_fks = table.schema().foreign_keys.clone();
3003        // v7.12.5 — `affected` is computed post-BEFORE-trigger
3004        // below (triggers may RETURN NULL to skip individual
3005        // rows). The pre-trigger len shape is no longer accurate.
3006        // Release mutable borrow on `table` for the FK passes.
3007        let _ = table;
3008        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
3009        // local FK columns changed, the new value must exist in the
3010        // parent.
3011        if !self_fks.is_empty() {
3012            let new_rows: Vec<Vec<Value>> = planned
3013                .iter()
3014                .map(|(_pos, new_vals)| new_vals.clone())
3015                .collect();
3016            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
3017        }
3018        // v7.13.0 — CHECK constraint enforcement on UPDATE
3019        // (mailrs round-5 G3). Predicates evaluated against the
3020        // candidate post-UPDATE row; false rejects the UPDATE.
3021        {
3022            let new_rows: Vec<Vec<Value>> = planned
3023                .iter()
3024                .map(|(_pos, new_vals)| new_vals.clone())
3025                .collect();
3026            enforce_check_constraints(self.active_catalog(), &stmt.table, &new_rows)?;
3027        }
3028        // v7.6.6 — Stage 2b: inbound FK check. For every row that
3029        // changed value in a column that *some other table* uses as
3030        // a FK parent column, react per `on_update` action.
3031        let child_plan =
3032            plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
3033        // Stage 3a — apply each child-side action.
3034        for step in &child_plan {
3035            apply_fk_child_step(self.active_catalog_mut(), step)?;
3036        }
3037        // Stage 3b — apply the original UPDATE.
3038        let table = self
3039            .active_catalog_mut()
3040            .get_mut(&stmt.table)
3041            .ok_or_else(|| {
3042                EngineError::Storage(StorageError::TableNotFound {
3043                    name: stmt.table.clone(),
3044                })
3045            })?;
3046        // v7.12.5 — fire BEFORE/AFTER UPDATE row-level triggers
3047        // around the apply loop. BEFORE sees NEW=candidate +
3048        // OLD=current; may rewrite NEW or RETURN NULL to skip.
3049        // AFTER sees NEW=post-write + OLD=pre-write (both read-
3050        // only).
3051        //
3052        // Filter `planned` through the BEFORE pass first so the
3053        // RETURNING snapshot reflects what actually got written
3054        // (triggers may rewrite cells, including a cancellation).
3055        let mut applied_after_before: Vec<(usize, Row, Row)> = Vec::with_capacity(planned.len());
3056        // v7.12.7 — embedded SQL queue.
3057        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
3058        for (pos, new_vals) in &planned {
3059            let old_row = table.rows()[*pos].clone();
3060            let mut new_row = Row::new(new_vals.clone());
3061            let mut skip = false;
3062            for (fd, filter) in &before_update_triggers {
3063                // v7.13.0 — `UPDATE OF cols` filter (mailrs round-5
3064                // G7). Skip this trigger when the filter is set and
3065                // no listed column actually differs between OLD and
3066                // NEW for this row.
3067                if !filter.is_empty()
3068                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
3069                {
3070                    continue;
3071                }
3072                let (outcome, deferred) = triggers::fire_row_trigger(
3073                    fd,
3074                    Some(new_row.clone()),
3075                    Some(&old_row),
3076                    &stmt.table,
3077                    &schema_cols,
3078                    &[],
3079                    trigger_session_cfg.as_deref(),
3080                    false,
3081                )
3082                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
3083                deferred_embedded.extend(deferred);
3084                match outcome {
3085                    triggers::TriggerOutcome::Row(r) => new_row = r,
3086                    triggers::TriggerOutcome::Skip => {
3087                        skip = true;
3088                        break;
3089                    }
3090                }
3091            }
3092            if !skip {
3093                applied_after_before.push((*pos, new_row, old_row));
3094            }
3095        }
3096        // v7.9.4 — snapshot post-update values for RETURNING (post-
3097        // BEFORE-trigger because triggers can rewrite cells).
3098        let updated_for_returning: Vec<Vec<Value>> = if stmt.returning.is_some() {
3099            applied_after_before
3100                .iter()
3101                .map(|(_pos, new_row, _old)| new_row.values.clone())
3102                .collect()
3103        } else {
3104            Vec::new()
3105        };
3106        let affected = applied_after_before.len();
3107        // Apply, then fire AFTER triggers per row. AFTER runs read-
3108        // only against the freshly-written row; v7.12.4-shape
3109        // assignment errors with a clear message.
3110        for (pos, new_row, old_row) in applied_after_before {
3111            table.update_row(pos, new_row.values.clone())?;
3112            for (fd, filter) in &after_update_triggers {
3113                if !filter.is_empty()
3114                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
3115                {
3116                    continue;
3117                }
3118                let (_outcome, deferred) = triggers::fire_row_trigger(
3119                    fd,
3120                    Some(new_row.clone()),
3121                    Some(&old_row),
3122                    &stmt.table,
3123                    &schema_cols,
3124                    &[],
3125                    trigger_session_cfg.as_deref(),
3126                    true,
3127                )
3128                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
3129                deferred_embedded.extend(deferred);
3130            }
3131        }
3132        let _ = table;
3133        // v7.12.7 — drain trigger-emitted embedded SQL for this UPDATE.
3134        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
3135        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
3136        if !self.in_transaction() && affected > 0 {
3137            self.statistics
3138                .record_modifications(&stmt.table, affected as u64);
3139        }
3140        // v7.9.4 — RETURNING projection.
3141        if let Some(items) = &stmt.returning {
3142            return self.build_returning_rows(&stmt.table, items, updated_for_returning);
3143        }
3144        Ok(QueryResult::CommandOk {
3145            affected,
3146            modified_catalog: !self.in_transaction(),
3147        })
3148    }
3149
3150    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
3151    /// positions then delegates to `Table::delete_rows` (single index
3152    /// rebuild for the batch).
3153    fn exec_delete_cancel(
3154        &mut self,
3155        stmt: &spg_sql::ast::DeleteStatement,
3156        cancel: CancelToken<'_>,
3157    ) -> Result<QueryResult, EngineError> {
3158        // v7.12.5 — snapshot BEFORE/AFTER DELETE row triggers + the
3159        // session FTS config before the mut borrow (same shape as
3160        // INSERT / UPDATE).
3161        let before_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "BEFORE");
3162        let after_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "AFTER");
3163        let trigger_session_cfg: Option<String> = self
3164            .session_params
3165            .get("default_text_search_config")
3166            .cloned();
3167        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
3168        // locator for the key. The cold row body stays in the
3169        // segment (becoming shadowed garbage that a future
3170        // compaction pass reclaims) but the index no longer
3171        // resolves it. The shadow count contributes to the
3172        // affected total; the subsequent hot walk handles any hot
3173        // rows for the same key.
3174        let mut cold_shadow_count: usize = 0;
3175        if let Some(w) = &stmt.where_ {
3176            let schema_cols = self
3177                .active_catalog()
3178                .get(&stmt.table)
3179                .ok_or_else(|| {
3180                    EngineError::Storage(StorageError::TableNotFound {
3181                        name: stmt.table.clone(),
3182                    })
3183                })?
3184                .schema()
3185                .columns
3186                .clone();
3187            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
3188                && let Some(idx_name) = self
3189                    .active_catalog()
3190                    .get(&stmt.table)
3191                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
3192            {
3193                cold_shadow_count = self
3194                    .active_catalog_mut()
3195                    .shadow_cold_row(&stmt.table, &idx_name, &key)
3196                    .unwrap_or(0);
3197            }
3198        }
3199
3200        // v7.12.1 — cache the session FTS config as an owned
3201        // String before the mutable table borrow below; the
3202        // ctx-builder then references it via `as_deref` so the
3203        // immutable read of `session_params` doesn't conflict
3204        // with the mut borrow chain.
3205        let ts_cfg: Option<String> = self
3206            .session_param("default_text_search_config")
3207            .map(String::from);
3208        let table = self
3209            .active_catalog_mut()
3210            .get_mut(&stmt.table)
3211            .ok_or_else(|| {
3212                EngineError::Storage(StorageError::TableNotFound {
3213                    name: stmt.table.clone(),
3214                })
3215            })?;
3216        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
3217        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
3218            .with_default_text_search_config(ts_cfg.as_deref());
3219        let mut positions: Vec<usize> = Vec::new();
3220        // v7.6.3 — collect every to-delete row's full Value tuple
3221        // alongside its position, so the FK enforcement pass can
3222        // run after the mut borrow drops.
3223        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
3224        for (i, row) in table.rows().iter().enumerate() {
3225            if i.is_multiple_of(256) {
3226                cancel.check()?;
3227            }
3228            let keep = if let Some(w) = &stmt.where_ {
3229                let cond = eval::eval_expr(w, row, &ctx)?;
3230                !matches!(cond, Value::Bool(true))
3231            } else {
3232                false
3233            };
3234            if !keep {
3235                positions.push(i);
3236                to_delete_rows.push(row.values.clone());
3237            }
3238        }
3239        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
3240        // catalog. Release the mut borrow and run reverse-scan
3241        // against every child table whose FK targets this table.
3242        // RESTRICT / NoAction raise an error; CASCADE returns a
3243        // cascade plan that stage 3 applies after the primary delete.
3244        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
3245        let _ = table;
3246        // v7.12.5 — BEFORE DELETE row-level triggers. Each fires
3247        // with NEW=None / OLD=pre-delete row; RETURN OLD (or NEW)
3248        // = proceed, RETURN NULL = skip the row entirely. The
3249        // filter must run BEFORE the FK cascade plan so cascaded
3250        // child rows track the trigger's skip-decision on the
3251        // parent.
3252        // v7.12.7 — embedded SQL queue.
3253        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
3254        if !before_delete_triggers.is_empty() {
3255            let mut filtered_positions: Vec<usize> = Vec::with_capacity(positions.len());
3256            let mut filtered_old_rows: Vec<Vec<Value>> = Vec::with_capacity(to_delete_rows.len());
3257            for (pos, old_vals) in positions.iter().zip(to_delete_rows.iter()) {
3258                let old_row = Row::new(old_vals.clone());
3259                let mut cancel_this = false;
3260                for fd in &before_delete_triggers {
3261                    let (outcome, deferred) = triggers::fire_row_trigger(
3262                        fd,
3263                        None,
3264                        Some(&old_row),
3265                        &stmt.table,
3266                        &schema_cols,
3267                        &[],
3268                        trigger_session_cfg.as_deref(),
3269                        false,
3270                    )
3271                    .map_err(|e| {
3272                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3273                    })?;
3274                    deferred_embedded.extend(deferred);
3275                    if matches!(outcome, triggers::TriggerOutcome::Skip) {
3276                        cancel_this = true;
3277                        break;
3278                    }
3279                }
3280                if !cancel_this {
3281                    filtered_positions.push(*pos);
3282                    filtered_old_rows.push(old_vals.clone());
3283                }
3284            }
3285            positions = filtered_positions;
3286            to_delete_rows = filtered_old_rows;
3287        }
3288        let cascade_plan = plan_fk_parent_deletions(
3289            self.active_catalog(),
3290            &stmt.table,
3291            &positions,
3292            &to_delete_rows,
3293        )?;
3294        // Stage 3a — apply each FK child step (SET NULL / SET
3295        // DEFAULT / CASCADE delete) before deleting the parent.
3296        // The plan is already ordered: nulls/defaults first, then
3297        // cascade deletes (so a row mutated and later deleted
3298        // surfaces as deleted — though v7.6.5 doesn't produce
3299        // that overlap today).
3300        for step in &cascade_plan {
3301            apply_fk_child_step(self.active_catalog_mut(), step)?;
3302        }
3303        // Stage 3b — actually delete the original target rows.
3304        let table = self
3305            .active_catalog_mut()
3306            .get_mut(&stmt.table)
3307            .ok_or_else(|| {
3308                EngineError::Storage(StorageError::TableNotFound {
3309                    name: stmt.table.clone(),
3310                })
3311            })?;
3312        let affected = table.delete_rows(&positions) + cold_shadow_count;
3313        let _ = table;
3314        // v7.12.5 — AFTER DELETE row-level triggers fire post-write
3315        // with NEW=None / OLD=pre-delete row (each from the
3316        // already-snapshotted to_delete_rows). Return value is
3317        // ignored (matches PG AFTER semantics).
3318        if !after_delete_triggers.is_empty() {
3319            for old_vals in &to_delete_rows {
3320                let old_row = Row::new(old_vals.clone());
3321                for fd in &after_delete_triggers {
3322                    let (_outcome, deferred) = triggers::fire_row_trigger(
3323                        fd,
3324                        None,
3325                        Some(&old_row),
3326                        &stmt.table,
3327                        &schema_cols,
3328                        &[],
3329                        trigger_session_cfg.as_deref(),
3330                        true,
3331                    )
3332                    .map_err(|e| {
3333                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3334                    })?;
3335                    deferred_embedded.extend(deferred);
3336                }
3337            }
3338        }
3339        // v7.12.7 — drain trigger-emitted embedded SQL for this DELETE.
3340        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
3341        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
3342        if !self.in_transaction() && affected > 0 {
3343            self.statistics
3344                .record_modifications(&stmt.table, affected as u64);
3345        }
3346        // v7.9.4 — RETURNING projection over the soon-to-be-gone
3347        // rows. `to_delete_rows` was snapshotted in stage 1 before
3348        // mutation, so the projection sees the pre-delete state
3349        // (matches PG semantics: DELETE RETURNING returns the row
3350        // as it was just before removal).
3351        if let Some(items) = &stmt.returning {
3352            return self.build_returning_rows(&stmt.table, items, to_delete_rows);
3353        }
3354        Ok(QueryResult::CommandOk {
3355            affected,
3356            modified_catalog: !self.in_transaction(),
3357        })
3358    }
3359
3360    /// `SHOW TABLES` — one row per table in the active catalog.
3361    /// Column name is `name` so result-set consumers can downstream
3362    /// `SELECT name FROM ...` style logic if needed.
3363    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
3364    /// `QUERY PLAN` text table — first line names the top operator
3365    /// (Scan / Aggregate / Window / etc.), indented children list
3366    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
3367    /// shape, and any active index hits. `ANALYZE` execs the inner
3368    /// SELECT and appends actual-row + elapsed-micros annotations.
3369    #[allow(clippy::format_push_string)]
3370    fn exec_explain(
3371        &self,
3372        e: &spg_sql::ast::ExplainStatement,
3373        cancel: CancelToken<'_>,
3374    ) -> Result<QueryResult, EngineError> {
3375        let mut lines = Vec::<String>::new();
3376        explain_select(&e.inner, self, 0, &mut lines);
3377        if e.suggest {
3378            // v6.8.3 — index advisor. Walks the SELECT's FROM
3379            // tables + WHERE column refs; for each (table, column)
3380            // pair that lacks an index, append a SUGGEST line with
3381            // a copy-pastable `CREATE INDEX` statement. This is a
3382            // pure-syntax heuristic — no cardinality estimation —
3383            // matching the v6.8.3 design intent of "tell the
3384            // operator where indexes are missing", not "give the
3385            // mathematically optimal index set".
3386            let suggestions = build_index_suggestions(&e.inner, self);
3387            for s in suggestions {
3388                lines.push(s);
3389            }
3390        } else if e.analyze {
3391            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
3392            // with `(rows=N)` where the row count is computable
3393            // without re-executing the full query:
3394            //   - Top-level operator (first non-indented line):
3395            //     rows = final result.len()
3396            //   - "From: <table> [full scan]" lines: rows =
3397            //     table.rows().len() (catalog read; no execution)
3398            //   - "From: <table> [index seek]": indeterminate —
3399            //     the index step would need re-execution; v6.2.5
3400            //     adds per-operator wall-clock + hot/cold rows
3401            //     instrumentation that makes this concrete.
3402            //   - Everything else: marked `(—)` so the surface
3403            //     stays well-defined without silently dropping
3404            //     stats. v6.2.5 fills in via inline executor
3405            //     instrumentation.
3406            // Total elapsed lands on a trailing `Total: …` line.
3407            let started = self.clock.map(|f| f());
3408            let exec = self.exec_select_cancel(&e.inner, cancel)?;
3409            let elapsed_micros = match (self.clock, started) {
3410                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
3411                _ => None,
3412            };
3413            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
3414                rows.len()
3415            } else {
3416                0
3417            };
3418            annotate_explain_lines(&mut lines, row_count, self);
3419            let mut total = alloc::format!("Total: rows={row_count}");
3420            if let Some(us) = elapsed_micros {
3421                total.push_str(&alloc::format!(" elapsed={us}us"));
3422            }
3423            lines.push(total);
3424        }
3425        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
3426        let rows: Vec<Row> = lines
3427            .into_iter()
3428            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
3429            .collect();
3430        Ok(QueryResult::Rows { columns, rows })
3431    }
3432
3433    fn exec_show_tables(&self) -> QueryResult {
3434        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
3435        let rows: Vec<Row> = self
3436            .active_catalog()
3437            .table_names()
3438            .into_iter()
3439            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
3440            .collect();
3441        QueryResult::Rows { columns, rows }
3442    }
3443
3444    /// `SHOW COLUMNS FROM <table>` — one row per column with the
3445    /// declared name, SQL type rendering, and nullability flag.
3446    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
3447        let table =
3448            self.active_catalog()
3449                .get(table_name)
3450                .ok_or_else(|| StorageError::TableNotFound {
3451                    name: table_name.into(),
3452                })?;
3453        let columns = alloc::vec![
3454            ColumnSchema::new("name", DataType::Text, false),
3455            ColumnSchema::new("type", DataType::Text, false),
3456            ColumnSchema::new("nullable", DataType::Bool, false),
3457        ];
3458        let rows: Vec<Row> = table
3459            .schema()
3460            .columns
3461            .iter()
3462            .map(|c| {
3463                Row::new(alloc::vec![
3464                    Value::Text(c.name.clone()),
3465                    Value::Text(alloc::format!("{}", c.ty)),
3466                    Value::Bool(c.nullable),
3467                ])
3468            })
3469            .collect();
3470        Ok(QueryResult::Rows { columns, rows })
3471    }
3472
3473    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
3474        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3475        if self.tx_catalogs.contains_key(&tx_id) {
3476            return Err(EngineError::TransactionAlreadyOpen);
3477        }
3478        self.tx_catalogs.insert(
3479            tx_id,
3480            TxState {
3481                catalog: self.catalog.clone(),
3482                savepoints: Vec::new(),
3483            },
3484        );
3485        Ok(QueryResult::CommandOk {
3486            affected: 0,
3487            modified_catalog: false,
3488        })
3489    }
3490
3491    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
3492        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3493        let state = self
3494            .tx_catalogs
3495            .remove(&tx_id)
3496            .ok_or(EngineError::NoActiveTransaction)?;
3497        self.catalog = state.catalog;
3498        // All savepoints become permanent at COMMIT and the stack
3499        // resets for the next TX (`state.savepoints` is discarded with
3500        // `state`).
3501        Ok(QueryResult::CommandOk {
3502            affected: 0,
3503            modified_catalog: true,
3504        })
3505    }
3506
3507    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
3508        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3509        if self.tx_catalogs.remove(&tx_id).is_none() {
3510            return Err(EngineError::NoActiveTransaction);
3511        }
3512        // savepoints discarded with the TxState
3513        Ok(QueryResult::CommandOk {
3514            affected: 0,
3515            modified_catalog: false,
3516        })
3517    }
3518
3519    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
3520        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3521        let state = self
3522            .tx_catalogs
3523            .get_mut(&tx_id)
3524            .ok_or(EngineError::NoActiveTransaction)?;
3525        // PG re-uses an existing savepoint name by dropping the older
3526        // entry and pushing a fresh one — match that behaviour so
3527        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
3528        state.savepoints.retain(|(n, _)| n != &name);
3529        let snapshot = state.catalog.clone();
3530        state.savepoints.push((name, snapshot));
3531        Ok(QueryResult::CommandOk {
3532            affected: 0,
3533            modified_catalog: false,
3534        })
3535    }
3536
3537    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3538        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3539        let state = self
3540            .tx_catalogs
3541            .get_mut(&tx_id)
3542            .ok_or(EngineError::NoActiveTransaction)?;
3543        let pos = state
3544            .savepoints
3545            .iter()
3546            .rposition(|(n, _)| n == name)
3547            .ok_or_else(|| {
3548                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3549            })?;
3550        // The savepoint stays on the stack (PG semantics): a later
3551        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
3552        // after it is discarded.
3553        let snapshot = state.savepoints[pos].1.clone();
3554        state.savepoints.truncate(pos + 1);
3555        state.catalog = snapshot;
3556        Ok(QueryResult::CommandOk {
3557            affected: 0,
3558            modified_catalog: false,
3559        })
3560    }
3561
3562    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3563        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3564        let state = self
3565            .tx_catalogs
3566            .get_mut(&tx_id)
3567            .ok_or(EngineError::NoActiveTransaction)?;
3568        let pos = state
3569            .savepoints
3570            .iter()
3571            .rposition(|(n, _)| n == name)
3572            .ok_or_else(|| {
3573                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3574            })?;
3575        // RELEASE keeps the work since the savepoint, just discards the
3576        // bookmark plus everything nested under it.
3577        state.savepoints.truncate(pos);
3578        Ok(QueryResult::CommandOk {
3579            affected: 0,
3580            modified_catalog: false,
3581        })
3582    }
3583
3584    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
3585    /// (encoding = …)]`. Walks every table in the active catalog
3586    /// looking for an index matching `stmt.name`, then delegates the
3587    /// rebuild (including any encoding switch) to
3588    /// `Table::rebuild_nsw_index`. The "live" non-blocking
3589    /// optimisation is v6.0.4.1 / v6.1.x territory.
3590    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
3591    /// arm. Currently the only setting is `hot_tier_bytes`; later
3592    /// v6.7.x can extend `AlterTableTarget` without touching this
3593    /// arm structure.
3594    fn exec_alter_table(
3595        &mut self,
3596        s: spg_sql::ast::AlterTableStatement,
3597    ) -> Result<QueryResult, EngineError> {
3598        // v7.13.2 — mailrs round-6 S1: apply each subaction in order.
3599        // On first error the statement aborts; subactions already
3600        // applied stay (no transactional rollback in v7.13 — wrap in
3601        // BEGIN/COMMIT if atomicity matters).
3602        let table_name = s.name.clone();
3603        for target in s.targets {
3604            self.exec_alter_table_subaction(&table_name, target)?;
3605        }
3606        Ok(QueryResult::CommandOk {
3607            affected: 0,
3608            modified_catalog: !self.in_transaction(),
3609        })
3610    }
3611
3612    fn exec_alter_table_subaction(
3613        &mut self,
3614        table_name_outer: &str,
3615        target: spg_sql::ast::AlterTableTarget,
3616    ) -> Result<(), EngineError> {
3617        // Inner helper retains the s.name closure shape; alias to `s`
3618        // for minimal diff against the v7.13.0 body.
3619        struct S<'a> {
3620            name: &'a str,
3621        }
3622        let s = S {
3623            name: table_name_outer,
3624        };
3625        match target {
3626            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
3627                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3628                    EngineError::Storage(StorageError::TableNotFound {
3629                        name: s.name.into(),
3630                    })
3631                })?;
3632                table.schema_mut().hot_tier_bytes = Some(n);
3633            }
3634            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
3635                // v7.6.8 — resolve FK against the live catalog first
3636                // (validates parent table, columns, indices). Then
3637                // verify every existing row in the child table
3638                // satisfies the new constraint. Then install it.
3639                let cols_snapshot = self
3640                    .active_catalog()
3641                    .get(s.name)
3642                    .ok_or_else(|| {
3643                        EngineError::Storage(StorageError::TableNotFound {
3644                            name: s.name.into(),
3645                        })
3646                    })?
3647                    .schema()
3648                    .columns
3649                    .clone();
3650                let storage_fk =
3651                    resolve_foreign_key(s.name, &cols_snapshot, fk, self.active_catalog())?;
3652                // Verify existing rows. Treat them as a virtual
3653                // INSERT batch — reusing the v7.6.2 enforce helper.
3654                let existing_rows: Vec<Vec<Value>> = self
3655                    .active_catalog()
3656                    .get(s.name)
3657                    .expect("checked above")
3658                    .rows()
3659                    .iter()
3660                    .map(|r| r.values.clone())
3661                    .collect();
3662                enforce_fk_inserts(
3663                    self.active_catalog(),
3664                    s.name,
3665                    core::slice::from_ref(&storage_fk),
3666                    &existing_rows,
3667                )?;
3668                // Reject duplicate constraint name.
3669                let table = self
3670                    .active_catalog_mut()
3671                    .get_mut(s.name)
3672                    .expect("checked above");
3673                if let Some(name) = &storage_fk.name
3674                    && table
3675                        .schema()
3676                        .foreign_keys
3677                        .iter()
3678                        .any(|f| f.name.as_ref() == Some(name))
3679                {
3680                    return Err(EngineError::Unsupported(alloc::format!(
3681                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
3682                    )));
3683                }
3684                table.schema_mut().foreign_keys.push(storage_fk);
3685            }
3686            spg_sql::ast::AlterTableTarget::DropForeignKey { name, if_exists } => {
3687                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3688                    EngineError::Storage(StorageError::TableNotFound {
3689                        name: s.name.into(),
3690                    })
3691                })?;
3692                let fks = &mut table.schema_mut().foreign_keys;
3693                let before = fks.len();
3694                fks.retain(|f| f.name.as_ref() != Some(&name));
3695                if fks.len() == before && !if_exists {
3696                    return Err(EngineError::Unsupported(alloc::format!(
3697                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
3698                        s.name
3699                    )));
3700                }
3701                // v7.13.2 mailrs round-6 S7: IF EXISTS silences the miss.
3702            }
3703            spg_sql::ast::AlterTableTarget::AddColumn {
3704                column,
3705                if_not_exists,
3706            } => {
3707                // v7.13.0 — mailrs round-5 G1. Append-only column add
3708                // with back-fill of the DEFAULT (or NULL) into every
3709                // existing row. Column positions don't shift, so we
3710                // skip index rebuild.
3711                let clock = self.clock;
3712                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3713                    EngineError::Storage(StorageError::TableNotFound {
3714                        name: s.name.into(),
3715                    })
3716                })?;
3717                if table
3718                    .schema()
3719                    .columns
3720                    .iter()
3721                    .any(|c| c.name.eq_ignore_ascii_case(&column.name))
3722                {
3723                    if if_not_exists {
3724                        return Ok(());
3725                    }
3726                    return Err(EngineError::Unsupported(alloc::format!(
3727                        "ALTER TABLE ADD COLUMN: column {:?} already exists on {:?}",
3728                        column.name,
3729                        s.name
3730                    )));
3731                }
3732                let col_name = column.name.clone();
3733                let nullable = column.nullable;
3734                let has_default = column.default.is_some() || column.auto_increment;
3735                let col_schema = column_def_to_schema(column)?;
3736                let row_count = table.row_count();
3737                // Compute the back-fill value. Literal / runtime DEFAULT
3738                // funnels through the same resolver that INSERT uses
3739                // (v7.9.21 `resolve_column_default_free`). NULL when
3740                // the column is nullable and has no DEFAULT. NOT NULL
3741                // without DEFAULT errors when the table has existing
3742                // rows — same as PG.
3743                let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
3744                    resolve_column_default_free(&col_schema, clock)?
3745                } else if nullable || row_count == 0 {
3746                    Value::Null
3747                } else {
3748                    return Err(EngineError::Unsupported(alloc::format!(
3749                        "ALTER TABLE ADD COLUMN {col_name:?}: NOT NULL column requires DEFAULT \
3750                         when the table has existing rows"
3751                    )));
3752                };
3753                table.add_column(col_schema, fill_value);
3754            }
3755            spg_sql::ast::AlterTableTarget::AlterColumnType {
3756                column,
3757                new_type,
3758                using,
3759            } => {
3760                // v7.13.0 — mailrs round-5 G8. Re-evaluate each
3761                // row's column value (either through the USING
3762                // expression if supplied, or as a direct CAST of
3763                // the existing value) and re-coerce to the new
3764                // type. Indices on the column get rebuilt.
3765                let new_data_type = column_type_to_data_type(new_type);
3766                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3767                    EngineError::Storage(StorageError::TableNotFound {
3768                        name: s.name.into(),
3769                    })
3770                })?;
3771                let col_pos = table
3772                    .schema()
3773                    .columns
3774                    .iter()
3775                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3776                    .ok_or_else(|| {
3777                        EngineError::Unsupported(alloc::format!(
3778                            "ALTER COLUMN TYPE: column {column:?} not found on {:?}",
3779                            s.name
3780                        ))
3781                    })?;
3782                let schema_cols = table.schema().columns.clone();
3783                let ctx = eval::EvalContext::new(&schema_cols, None);
3784                let mut new_values: alloc::vec::Vec<Value> =
3785                    alloc::vec::Vec::with_capacity(table.row_count());
3786                for row in table.rows().iter() {
3787                    let raw = match &using {
3788                        Some(expr) => eval::eval_expr(expr, row, &ctx).map_err(|e| {
3789                            EngineError::Unsupported(alloc::format!(
3790                                "ALTER COLUMN TYPE: USING expression failed: {e:?}"
3791                            ))
3792                        })?,
3793                        None => row.values.get(col_pos).cloned().unwrap_or(Value::Null),
3794                    };
3795                    let coerced = coerce_value(raw, new_data_type, &column, col_pos)?;
3796                    new_values.push(coerced);
3797                }
3798                table.schema_mut().columns[col_pos].ty = new_data_type;
3799                for (i, v) in new_values.into_iter().enumerate() {
3800                    let mut row_values = table
3801                        .rows()
3802                        .get(i)
3803                        .expect("bounds-checked above")
3804                        .values
3805                        .clone();
3806                    row_values[col_pos] = v;
3807                    table.update_row(i, row_values)?;
3808                }
3809            }
3810            spg_sql::ast::AlterTableTarget::AddTableConstraint(tc) => {
3811                // v7.14.0 — pg_dump emits PKs as a separate
3812                // ALTER TABLE ADD CONSTRAINT post-CREATE-TABLE.
3813                // For PRIMARY KEY / UNIQUE, install a UC entry
3814                // and the implicit BTree index on the leading
3815                // column. CHECK: append predicate to schema.
3816                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3817                    EngineError::Storage(StorageError::TableNotFound {
3818                        name: s.name.into(),
3819                    })
3820                })?;
3821                let is_pk = matches!(tc, spg_sql::ast::TableConstraint::PrimaryKey { .. });
3822                match tc {
3823                    spg_sql::ast::TableConstraint::PrimaryKey { columns, .. }
3824                    | spg_sql::ast::TableConstraint::Unique { columns, .. } => {
3825                        let positions: Vec<usize> = columns
3826                            .iter()
3827                            .map(|c| {
3828                                table
3829                                    .schema()
3830                                    .columns
3831                                    .iter()
3832                                    .position(|sc| sc.name.eq_ignore_ascii_case(c))
3833                                    .ok_or_else(|| {
3834                                        EngineError::Unsupported(alloc::format!(
3835                                            "ALTER TABLE ADD CONSTRAINT: column {c:?} not found on {:?}",
3836                                            s.name
3837                                        ))
3838                                    })
3839                            })
3840                            .collect::<Result<Vec<_>, _>>()?;
3841                        // Skip if an equivalent UC is already there
3842                        // (idempotent — pg_dump's PK + a prior inline
3843                        // PK shouldn't double-install).
3844                        let already = table
3845                            .schema()
3846                            .uniqueness_constraints
3847                            .iter()
3848                            .any(|u| u.columns == positions);
3849                        if !already {
3850                            table.schema_mut().uniqueness_constraints.push(
3851                                spg_storage::UniquenessConstraint {
3852                                    is_primary_key: is_pk,
3853                                    columns: positions.clone(),
3854                                    nulls_not_distinct: false,
3855                                },
3856                            );
3857                            // PK implies NOT NULL on referenced cols.
3858                            if is_pk {
3859                                for p in &positions {
3860                                    if let Some(c) = table.schema_mut().columns.get_mut(*p) {
3861                                        c.nullable = false;
3862                                    }
3863                                }
3864                            }
3865                            // Add a BTree index on the leading
3866                            // column for INSERT-side enforcement.
3867                            let leading = &columns[0];
3868                            let already_idx = table.indices().iter().any(|idx| {
3869                                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3870                                    && table.schema().columns[idx.column_position].name == *leading
3871                            });
3872                            if !already_idx {
3873                                let suffix = if is_pk { "pkey" } else { "key" };
3874                                let idx_name = alloc::format!("{}_{leading}_{suffix}", s.name);
3875                                let _ = table.add_index(idx_name, leading);
3876                            }
3877                        }
3878                    }
3879                    spg_sql::ast::TableConstraint::Check { expr, .. } => {
3880                        table.schema_mut().checks.push(alloc::format!("{expr}"));
3881                    }
3882                    spg_sql::ast::TableConstraint::Index { name, columns } => {
3883                        // v7.15.0 — ALTER TABLE ADD KEY (cols).
3884                        // mysqldump occasionally emits this
3885                        // post-CREATE-TABLE shape; build a BTree
3886                        // on the leading column using the
3887                        // user-supplied or synthesised name.
3888                        let leading = &columns[0];
3889                        let already_idx = table.indices().iter().any(|idx| {
3890                            matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3891                                && table.schema().columns[idx.column_position].name == *leading
3892                        });
3893                        if !already_idx {
3894                            let idx_name = name
3895                                .clone()
3896                                .unwrap_or_else(|| alloc::format!("{}_{leading}_idx", s.name));
3897                            let _ = table.add_index(idx_name, leading);
3898                        }
3899                    }
3900                }
3901            }
3902            spg_sql::ast::AlterTableTarget::DropColumn {
3903                column,
3904                if_exists,
3905                cascade,
3906            } => {
3907                // v7.13.3 — mailrs round-7 S8. Remove the column +
3908                // every row's value at that position; drop any index
3909                // on the column. RESTRICT (default) rejects when an
3910                // FK on this table or partial-index predicate
3911                // references the column; CASCADE removes those
3912                // dependents first.
3913                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3914                    EngineError::Storage(StorageError::TableNotFound {
3915                        name: s.name.into(),
3916                    })
3917                })?;
3918                let col_pos = match table
3919                    .schema()
3920                    .columns
3921                    .iter()
3922                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3923                {
3924                    Some(p) => p,
3925                    None => {
3926                        if if_exists {
3927                            return Ok(());
3928                        }
3929                        return Err(EngineError::Unsupported(alloc::format!(
3930                            "ALTER TABLE DROP COLUMN: column {column:?} not found on {:?}",
3931                            s.name
3932                        )));
3933                    }
3934                };
3935                // Dependent check: FKs whose local columns include
3936                // col_pos. CASCADE drops them; otherwise reject.
3937                let dependent_fks: Vec<usize> = table
3938                    .schema()
3939                    .foreign_keys
3940                    .iter()
3941                    .enumerate()
3942                    .filter_map(|(i, fk)| {
3943                        if fk.local_columns.contains(&col_pos) {
3944                            Some(i)
3945                        } else {
3946                            None
3947                        }
3948                    })
3949                    .collect();
3950                if !dependent_fks.is_empty() && !cascade {
3951                    return Err(EngineError::Unsupported(alloc::format!(
3952                        "ALTER TABLE DROP COLUMN {column:?}: column has FK dependents; \
3953                         use DROP COLUMN ... CASCADE to remove them"
3954                    )));
3955                }
3956                // CASCADE the FK removals first.
3957                if cascade {
3958                    // Drop in reverse so indices stay valid.
3959                    let mut sorted = dependent_fks.clone();
3960                    sorted.sort();
3961                    sorted.reverse();
3962                    let fks = &mut table.schema_mut().foreign_keys;
3963                    for i in sorted {
3964                        fks.remove(i);
3965                    }
3966                }
3967                // Drop the column. New helper on Table does the
3968                // row + schema + index shift atomically.
3969                table.drop_column(col_pos);
3970            }
3971            spg_sql::ast::AlterTableTarget::SetTriggerEnabled { which, enabled } => {
3972                // v7.16.1 — mailrs round-9 A.2.b. pg_dump
3973                // --disable-triggers wraps each table's data
3974                // block with `ALTER TABLE … DISABLE TRIGGER ALL`
3975                // / `… ENABLE TRIGGER ALL`. Toggle the enabled
3976                // flag on every matching trigger so the row-
3977                // write paths skip them; the catalog snapshot
3978                // persists the new state across restarts.
3979                let table_name = s.name.to_string();
3980                let trigs = self.active_catalog_mut().triggers_mut();
3981                let mut touched = false;
3982                for t in trigs.iter_mut() {
3983                    if !t.table.eq_ignore_ascii_case(&table_name) {
3984                        continue;
3985                    }
3986                    match &which {
3987                        spg_sql::ast::TriggerSelector::All => {
3988                            t.enabled = enabled;
3989                            touched = true;
3990                        }
3991                        spg_sql::ast::TriggerSelector::Named(name) => {
3992                            if t.name.eq_ignore_ascii_case(name) {
3993                                t.enabled = enabled;
3994                                touched = true;
3995                            }
3996                        }
3997                    }
3998                }
3999                // PG semantics: `ALL` on a table with no
4000                // triggers is a no-op (no error). A `Named`
4001                // form pointing at a non-existent trigger
4002                // raises in PG; v7.16.1 also raises so we
4003                // don't silently lose state.
4004                if !touched {
4005                    if let spg_sql::ast::TriggerSelector::Named(name) = &which {
4006                        return Err(EngineError::Unsupported(alloc::format!(
4007                            "ALTER TABLE {table_name:?} {} TRIGGER {name:?}: no such trigger on table",
4008                            if enabled { "ENABLE" } else { "DISABLE" },
4009                        )));
4010                    }
4011                }
4012            }
4013            spg_sql::ast::AlterTableTarget::RenameTable { new } => {
4014                // v7.16.2 — table-level rename (mailrs round-10
4015                // A.5 — used by migrate-042's `ALTER TABLE
4016                // contacts RENAME TO email_contacts`). Storage
4017                // helper updates the schema + by_name index +
4018                // dangling FK / trigger references in one
4019                // atomic step.
4020                let old = s.name.to_string();
4021                self.active_catalog_mut()
4022                    .rename_table(&old, &new)
4023                    .map_err(EngineError::Storage)?;
4024            }
4025            spg_sql::ast::AlterTableTarget::RenameColumn { old, new } => {
4026                // v7.15.0 — `ALTER TABLE t RENAME [COLUMN] old TO
4027                // new`. Rename the column in the schema; rewrite
4028                // every stored source string on this table that
4029                // references it as a (potentially-qualified)
4030                // column identifier: CHECK predicates, partial-
4031                // index predicates, runtime DEFAULT expressions.
4032                // Then walk catalog triggers on this table and
4033                // patch any `UPDATE OF` column list. Function and
4034                // trigger bodies are NOT auto-rewritten — that
4035                // surface is dynamic SQL territory; users update
4036                // those separately (matches PG plpgsql behavior:
4037                // a column rename invalidates name-referencing
4038                // plpgsql at call time, not rename time).
4039                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
4040                    EngineError::Storage(StorageError::TableNotFound {
4041                        name: s.name.into(),
4042                    })
4043                })?;
4044                let col_pos = table
4045                    .schema()
4046                    .columns
4047                    .iter()
4048                    .position(|c| c.name.eq_ignore_ascii_case(&old))
4049                    .ok_or_else(|| {
4050                        EngineError::Unsupported(alloc::format!(
4051                            "ALTER TABLE RENAME COLUMN: column {old:?} not found on {:?}",
4052                            s.name
4053                        ))
4054                    })?;
4055                // Reject same-name (case-insensitive) collision.
4056                if table
4057                    .schema()
4058                    .columns
4059                    .iter()
4060                    .enumerate()
4061                    .any(|(i, c)| i != col_pos && c.name.eq_ignore_ascii_case(&new))
4062                {
4063                    return Err(EngineError::Unsupported(alloc::format!(
4064                        "ALTER TABLE RENAME COLUMN: column {new:?} already exists on {:?}",
4065                        s.name
4066                    )));
4067                }
4068                // Schema rename first — even idempotent same-name
4069                // rename (`ALTER TABLE t RENAME a TO a`) needs to
4070                // be a no-op, not an error.
4071                if old.eq_ignore_ascii_case(&new) {
4072                    return Ok(());
4073                }
4074                table.rename_column(col_pos, &new);
4075                // Rewrite per-column runtime_default sources on
4076                // every column of this table — a DEFAULT expression
4077                // on column X may reference column Y by name (rare,
4078                // but legal in PG when the value is supplied via a
4079                // function that takes the row).
4080                let n_cols = table.schema().columns.len();
4081                for i in 0..n_cols {
4082                    let rt = table.schema().columns[i].runtime_default.clone();
4083                    if let Some(src) = rt {
4084                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
4085                        table.schema_mut().columns[i].runtime_default = Some(rewritten);
4086                    }
4087                }
4088                // Rewrite table-level CHECK predicates.
4089                let checks = table.schema().checks.clone();
4090                let mut new_checks = Vec::with_capacity(checks.len());
4091                for chk in checks {
4092                    new_checks.push(rewrite_column_in_source(&chk, &old, &new)?);
4093                }
4094                table.schema_mut().checks = new_checks;
4095                // Rewrite per-index partial_predicate sources.
4096                let n_idx = table.indices().len();
4097                for i in 0..n_idx {
4098                    let pred = table.indices()[i].partial_predicate.clone();
4099                    if let Some(src) = pred {
4100                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
4101                        // SAFETY: indices_mut would be cleanest, but
4102                        // partial_predicate is the only mutable field
4103                        // here; reach in via the public mut accessor.
4104                        table.set_partial_predicate(i, Some(rewritten));
4105                    }
4106                }
4107                // Walk catalog triggers; patch `update_columns` on
4108                // triggers attached to this table.
4109                let table_name = s.name.to_string();
4110                for trig in self.active_catalog_mut().triggers_mut() {
4111                    if !trig.table.eq_ignore_ascii_case(&table_name) {
4112                        continue;
4113                    }
4114                    for c in &mut trig.update_columns {
4115                        if c.eq_ignore_ascii_case(&old) {
4116                            *c = new.clone();
4117                        }
4118                    }
4119                }
4120            }
4121        }
4122        Ok(())
4123    }
4124
4125    fn exec_alter_index(
4126        &mut self,
4127        stmt: spg_sql::ast::AlterIndexStatement,
4128    ) -> Result<QueryResult, EngineError> {
4129        // Translate the optional SQL-side encoding choice into the
4130        // storage-side enum; the same SqlVecEncoding -> VecEncoding
4131        // bridge `column_type_to_data_type` uses.
4132        let spg_sql::ast::AlterIndexStatement {
4133            name: idx_name,
4134            target,
4135        } = stmt;
4136        // v7.16.2 — RENAME TO branch (mailrs round-10 migrate-042).
4137        // IF EXISTS makes a missing index a no-op rather than an
4138        // error, mirroring PG semantics.
4139        if let spg_sql::ast::AlterIndexTarget::Rename { new, if_exists } = target {
4140            let renamed = self.active_catalog_mut().rename_index(&idx_name, &new);
4141            return match renamed {
4142                Ok(()) => Ok(QueryResult::CommandOk {
4143                    affected: 0,
4144                    modified_catalog: !self.in_transaction(),
4145                }),
4146                Err(StorageError::IndexNotFound { .. }) if if_exists => {
4147                    Ok(QueryResult::CommandOk {
4148                        affected: 0,
4149                        modified_catalog: false,
4150                    })
4151                }
4152                Err(e) => Err(EngineError::Storage(e)),
4153            };
4154        }
4155        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target else {
4156            unreachable!("Rename branch returned above");
4157        };
4158        let target = encoding.map(|e| match e {
4159            SqlVecEncoding::F32 => VecEncoding::F32,
4160            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
4161            SqlVecEncoding::F16 => VecEncoding::F16,
4162        });
4163        // Linear scan: index names are globally unique within a
4164        // catalog (enforced by add_nsw_index_inner) so the first
4165        // match is the only one. Save the table name to avoid
4166        // borrowing while we then take a mut borrow.
4167        let table_name = {
4168            let cat = self.active_catalog();
4169            let mut found: Option<String> = None;
4170            for tname in cat.table_names() {
4171                if let Some(t) = cat.get(&tname)
4172                    && t.indices().iter().any(|i| i.name == idx_name)
4173                {
4174                    found = Some(tname);
4175                    break;
4176                }
4177            }
4178            found.ok_or_else(|| {
4179                EngineError::Storage(StorageError::IndexNotFound {
4180                    name: idx_name.clone(),
4181                })
4182            })?
4183        };
4184        let table = self
4185            .active_catalog_mut()
4186            .get_mut(&table_name)
4187            .expect("table found above");
4188        table.rebuild_nsw_index(&idx_name, target)?;
4189        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
4190        // changes cost characteristics; evict any cached plans.
4191        self.plan_cache.evict_referencing(&table_name);
4192        Ok(QueryResult::CommandOk {
4193            affected: 0,
4194            modified_catalog: !self.in_transaction(),
4195        })
4196    }
4197
4198    fn exec_create_index(
4199        &mut self,
4200        stmt: CreateIndexStatement,
4201    ) -> Result<QueryResult, EngineError> {
4202        let table = self
4203            .active_catalog_mut()
4204            .get_mut(&stmt.table)
4205            .ok_or_else(|| {
4206                EngineError::Storage(StorageError::TableNotFound {
4207                    name: stmt.table.clone(),
4208                })
4209            })?;
4210        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
4211        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
4212            return Ok(QueryResult::CommandOk {
4213                affected: 0,
4214                modified_catalog: false,
4215            });
4216        }
4217        // v7.9.14 — multi-column index parses through; engine
4218        // builds a single-column BTree on the leading column only.
4219        // The extras live on the AST so spg-server's dispatcher
4220        // can emit a PG-wire NoticeResponse / log line. Composite
4221        // BTree keys land in v7.10.
4222        let _ = &stmt.extra_columns; // intentional drop on engine side
4223        let table_name = stmt.table.clone();
4224        // v6.8.0 — resolve INCLUDE column names to positions. Done
4225        // before `add_index` so a typo error surfaces before any
4226        // catalog mutation lands.
4227        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
4228            Vec::new()
4229        } else {
4230            let schema = table.schema();
4231            stmt.included_columns
4232                .iter()
4233                .map(|c| {
4234                    schema.column_position(c).ok_or_else(|| {
4235                        EngineError::Storage(StorageError::ColumnNotFound { column: c.clone() })
4236                    })
4237                })
4238                .collect::<Result<Vec<_>, _>>()?
4239        };
4240        match stmt.method {
4241            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
4242            IndexMethod::Hnsw => {
4243                if !included_positions.is_empty() {
4244                    return Err(EngineError::Unsupported(
4245                        "INCLUDE columns are not supported on HNSW indexes".into(),
4246                    ));
4247                }
4248                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
4249            }
4250            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
4251            IndexMethod::Brin => {
4252                if !included_positions.is_empty() {
4253                    return Err(EngineError::Unsupported(
4254                        "INCLUDE columns are not supported on BRIN indexes".into(),
4255                    ));
4256                }
4257                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
4258            }
4259            // v7.12.3 — GIN inverted index. Real posting-list-backed
4260            // GIN when the indexed column is `tsvector`; falls back
4261            // to a BTree on the leading column for any other column
4262            // type so v7.9.26b's `pg_dump` compatibility (GIN on
4263            // JSONB etc. silently loading as BTree) is preserved.
4264            // Operators see the real GIN only where it matters; old
4265            // schemas keep loading.
4266            IndexMethod::Gin => {
4267                if !included_positions.is_empty() {
4268                    return Err(EngineError::Unsupported(
4269                        "INCLUDE columns are not supported on GIN indexes".into(),
4270                    ));
4271                }
4272                let col_pos = table
4273                    .schema()
4274                    .column_position(&stmt.column)
4275                    .ok_or_else(|| {
4276                        EngineError::Storage(StorageError::ColumnNotFound {
4277                            column: stmt.column.clone(),
4278                        })
4279                    })?;
4280                let col_ty = table.schema().columns[col_pos].ty;
4281                // v7.15.0 — `gin_trgm_ops` on a TEXT/VARCHAR
4282                // column dispatches to the real trigram-shingle
4283                // GIN build (LIKE / similarity acceleration).
4284                // Other GIN opclasses fall through to the regular
4285                // tsvector-vs-BTree split below.
4286                let is_trgm = stmt
4287                    .opclass
4288                    .as_deref()
4289                    .is_some_and(|op| op.eq_ignore_ascii_case("gin_trgm_ops"));
4290                if is_trgm
4291                    && matches!(
4292                        col_ty,
4293                        spg_storage::DataType::Text | spg_storage::DataType::Varchar(_)
4294                    )
4295                {
4296                    table
4297                        .add_gin_trgm_index(stmt.name.clone(), &stmt.column)
4298                        .map_err(EngineError::Storage)?;
4299                } else if col_ty == spg_storage::DataType::TsVector {
4300                    table
4301                        .add_gin_index(stmt.name.clone(), &stmt.column)
4302                        .map_err(EngineError::Storage)?;
4303                } else {
4304                    // v7.9.26b BTree fallback — the catalog still
4305                    // gets an index entry on the leading column so
4306                    // pg_dump scripts that name GIN on JSONB / etc.
4307                    // load clean; query-time gain stays opt-in for
4308                    // tsvector callers.
4309                    table.add_index(stmt.name.clone(), &stmt.column)?;
4310                }
4311            }
4312        }
4313        if !included_positions.is_empty()
4314            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
4315        {
4316            idx.included_columns = included_positions;
4317        }
4318        // v6.8.1 — persist partial-index predicate. Stored as the
4319        // expression's Display form so the catalog snapshot stays
4320        // pure (storage has no spg-sql dependency). The runtime
4321        // maintenance path treats partial indexes identically to
4322        // full indexes for v6.8.1 (over-maintenance is safe; the
4323        // planner-side "use partial when query WHERE implies the
4324        // predicate" pass is STABILITY carve-out).
4325        if let Some(pred_expr) = &stmt.partial_predicate {
4326            let canonical = pred_expr.to_string();
4327            // v7.13.2 — mailrs round-6 S2. PG's `pg_trgm` uses
4328            // `CREATE INDEX … USING gin(col gin_trgm_ops) WHERE …`
4329            // routinely to slim trigram indexes. SPG now persists
4330            // the predicate for GIN / BRIN / HNSW the same way it
4331            // already does for BTree — same v6.8.1 "over-maintain
4332            // is safe; planner-side partial routing is STABILITY
4333            // carve-out" semantics. HNSW carries an additional
4334            // caveat: the predicate isn't applied at index build
4335            // time (would require per-row eval inside the NSW
4336            // construction loop), so the index oversamples; query
4337            // time the WHERE clause still filters correctly.
4338            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4339                idx.partial_predicate = Some(canonical);
4340            }
4341        }
4342        // v6.8.2 — persist expression index key. Same Display-form
4343        // storage; the runtime maintenance pass evaluates each
4344        // row's expression to derive the index key, but for v6.8.2
4345        // the engine falls through to the bare-column-reference
4346        // path and the expression is preserved for format-layer
4347        // round-trip + future planner work. Carved-out in
4348        // STABILITY § "Out of v6.8".
4349        if let Some(key_expr) = &stmt.expression {
4350            if matches!(
4351                stmt.method,
4352                IndexMethod::Hnsw | IndexMethod::Brin | IndexMethod::Gin
4353            ) {
4354                return Err(EngineError::Unsupported(
4355                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
4356                ));
4357            }
4358            let canonical = key_expr.to_string();
4359            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4360                idx.expression = Some(canonical);
4361            }
4362        }
4363        // v7.9.29 — persist `is_unique` flag on the storage Index.
4364        // Combined with `partial_predicate`, INSERT enforcement
4365        // checks that no other row whose predicate evaluates true
4366        // shares the same indexed key. Parser already rejected
4367        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
4368        // For multi-column UNIQUE INDEX the extras matter (the
4369        // full tuple is the uniqueness key), so resolve them to
4370        // column positions and persist on the index too.
4371        if stmt.is_unique {
4372            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
4373            for col_name in &stmt.extra_columns {
4374                let pos = table
4375                    .schema()
4376                    .columns
4377                    .iter()
4378                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
4379                    .ok_or_else(|| {
4380                        EngineError::Unsupported(alloc::format!(
4381                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
4382                            stmt.name,
4383                            stmt.table
4384                        ))
4385                    })?;
4386                extra_positions.push(pos);
4387            }
4388            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4389                idx.is_unique = true;
4390                idx.extra_column_positions = extra_positions;
4391            }
4392            // At index-creation time, check the existing rows for
4393            // pre-existing duplicates that would have violated the
4394            // new constraint — otherwise CREATE UNIQUE INDEX would
4395            // silently leave duplicates in place.
4396            let snapshot_indices = table.indices().to_vec();
4397            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
4398                table.rows().iter().cloned().collect();
4399            let snapshot_schema = table.schema().clone();
4400            let idx_ref = snapshot_indices
4401                .iter()
4402                .find(|i| i.name == stmt.name)
4403                .expect("just-added index");
4404            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
4405        }
4406        // v6.3.1 — adding an index can change the optimal plan for
4407        // any cached query that references this table.
4408        self.plan_cache.evict_referencing(&table_name);
4409        Ok(QueryResult::CommandOk {
4410            affected: 0,
4411            modified_catalog: !self.in_transaction(),
4412        })
4413    }
4414
4415    /// v7.13.3 — mailrs round-7 S9. SPG-specific reconciliation
4416    /// for `CREATE TABLE IF NOT EXISTS` when the table already
4417    /// exists. Adds missing columns + inline FKs from the new
4418    /// definition; existing columns / constraints stay untouched.
4419    /// New columns with a `NOT NULL` declaration without a
4420    /// `DEFAULT` are reported as a clear error rather than
4421    /// silently dropped — this is the "fail loud on real
4422    /// incompatibility, fail silent on schema-superset" tradeoff.
4423    fn reconcile_table_if_not_exists(
4424        &mut self,
4425        stmt: CreateTableStatement,
4426    ) -> Result<QueryResult, EngineError> {
4427        let table_name = stmt.name.clone();
4428        let clock = self.clock;
4429        let existing_col_names: alloc::collections::BTreeSet<String> = self
4430            .active_catalog()
4431            .get(&table_name)
4432            .expect("checked above")
4433            .schema()
4434            .columns
4435            .iter()
4436            .map(|c| c.name.to_ascii_lowercase())
4437            .collect();
4438        let row_count = self
4439            .active_catalog()
4440            .get(&table_name)
4441            .expect("checked above")
4442            .row_count();
4443        // Collect missing column defs in source order.
4444        let new_columns: alloc::vec::Vec<spg_sql::ast::ColumnDef> = stmt
4445            .columns
4446            .iter()
4447            .filter(|c| !existing_col_names.contains(&c.name.to_ascii_lowercase()))
4448            .cloned()
4449            .collect();
4450        for col_def in new_columns {
4451            let col_name = col_def.name.clone();
4452            let nullable = col_def.nullable;
4453            let has_default = col_def.default.is_some() || col_def.auto_increment;
4454            let col_schema = column_def_to_schema(col_def)?;
4455            let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
4456                resolve_column_default_free(&col_schema, clock)?
4457            } else if nullable || row_count == 0 {
4458                Value::Null
4459            } else {
4460                return Err(EngineError::Unsupported(alloc::format!(
4461                    "CREATE TABLE IF NOT EXISTS {table_name:?}: reconciling \
4462                     column {col_name:?} requires DEFAULT (existing rows would violate NOT NULL)"
4463                )));
4464            };
4465            let table = self
4466                .active_catalog_mut()
4467                .get_mut(&table_name)
4468                .expect("checked above");
4469            table.add_column(col_schema, fill_value);
4470        }
4471        // Resolve any newly-added inline FKs (column-level
4472        // REFERENCES forms) and install. Skip FKs whose local
4473        // columns we didn't have in the existing table.
4474        let table_cols_now = self
4475            .active_catalog()
4476            .get(&table_name)
4477            .expect("checked above")
4478            .schema()
4479            .columns
4480            .clone();
4481        for fk in stmt.foreign_keys {
4482            // Only install FKs whose every local column resolves
4483            // — older catalogs may have a column the new FK
4484            // references but not the column the new FK declares.
4485            let all_resolved = fk.columns.iter().all(|c| {
4486                table_cols_now
4487                    .iter()
4488                    .any(|sc| sc.name.eq_ignore_ascii_case(c))
4489            });
4490            if !all_resolved {
4491                continue;
4492            }
4493            let already_present = {
4494                let table = self
4495                    .active_catalog()
4496                    .get(&table_name)
4497                    .expect("checked above");
4498                table.schema().foreign_keys.iter().any(|f| {
4499                    f.parent_table.eq_ignore_ascii_case(&fk.parent_table)
4500                        && f.local_columns.len() == fk.columns.len()
4501                })
4502            };
4503            if already_present {
4504                continue;
4505            }
4506            let storage_fk =
4507                resolve_foreign_key(&table_name, &table_cols_now, fk, self.active_catalog())?;
4508            let table = self
4509                .active_catalog_mut()
4510                .get_mut(&table_name)
4511                .expect("checked above");
4512            table.schema_mut().foreign_keys.push(storage_fk);
4513        }
4514        Ok(QueryResult::CommandOk {
4515            affected: 0,
4516            modified_catalog: !self.in_transaction(),
4517        })
4518    }
4519
4520    /// v7.14.0 — DROP TABLE handler (pg_dump / mysqldump preamble).
4521    fn exec_drop_table(
4522        &mut self,
4523        names: Vec<String>,
4524        if_exists: bool,
4525    ) -> Result<QueryResult, EngineError> {
4526        for name in names {
4527            let dropped = self.active_catalog_mut().drop_table(&name);
4528            if !dropped && !if_exists {
4529                return Err(EngineError::Storage(StorageError::TableNotFound { name }));
4530            }
4531        }
4532        Ok(QueryResult::CommandOk {
4533            affected: 0,
4534            modified_catalog: !self.in_transaction(),
4535        })
4536    }
4537
4538    /// v7.14.0 — DROP INDEX handler.
4539    fn exec_drop_index(
4540        &mut self,
4541        name: String,
4542        if_exists: bool,
4543    ) -> Result<QueryResult, EngineError> {
4544        let dropped = self.active_catalog_mut().drop_named_index(&name);
4545        if !dropped && !if_exists {
4546            return Err(EngineError::Storage(StorageError::IndexNotFound { name }));
4547        }
4548        Ok(QueryResult::CommandOk {
4549            affected: 0,
4550            modified_catalog: !self.in_transaction(),
4551        })
4552    }
4553
4554    fn exec_create_table(
4555        &mut self,
4556        stmt: CreateTableStatement,
4557    ) -> Result<QueryResult, EngineError> {
4558        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
4559            // v7.16.2 — PG-strict silent no-op (mailrs round-10
4560            // surfaced this). v7.13.3's "reconcile by adding
4561            // missing columns" was friendly for mailrs round-7
4562            // where init-schema's `contacts` and migrate-023's
4563            // CardDAV `contacts` collided; but it ALSO silently
4564            // added columns to existing tables when later
4565            // migrations had a duplicate `CREATE TABLE IF NOT
4566            // EXISTS <t> (different-shape-cols)` shape. mailrs's
4567            // migrate-030 has exactly that — re-declares
4568            // system_config with `key` even though init-schema
4569            // already created it with `config_key`. PG's silent
4570            // no-op leaves system_config at `config_key`;
4571            // v7.13.3 added a phantom `key` column that then
4572            // tripped migrate-040's idempotent rename guard.
4573            // mailrs v1.7.106 ships the proper PG-style
4574            // contacts rename via DO + IF EXISTS, so SPG can
4575            // revert to PG-strict here without re-breaking the
4576            // round-7 case.
4577            return Ok(QueryResult::CommandOk {
4578                affected: 0,
4579                modified_catalog: false,
4580            });
4581        }
4582        let table_name = stmt.name.clone();
4583        // v7.9.13 — pluck the names of any columns marked
4584        // `PRIMARY KEY` inline so the post-create-table pass can
4585        // build an implicit BTree index. mailrs F1.
4586        let inline_pk_columns: Vec<String> = stmt
4587            .columns
4588            .iter()
4589            .filter(|c| c.is_primary_key)
4590            .map(|c| c.name.clone())
4591            .collect();
4592        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
4593        // and UNIQUE (a, b, ...). Each builds a BTree index on the
4594        // leading column (the existing single-column storage tier)
4595        // and registers a UniquenessConstraint on the schema for
4596        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
4597        let cols = stmt
4598            .columns
4599            .into_iter()
4600            .map(column_def_to_schema)
4601            .collect::<Result<Vec<_>, _>>()?;
4602        // Composite NOT-NULL implication for PRIMARY KEY columns.
4603        let mut cols = cols;
4604        for tc in &stmt.table_constraints {
4605            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
4606                for col_name in columns {
4607                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
4608                        col.nullable = false;
4609                    }
4610                }
4611            }
4612        }
4613        // v7.6.1 — resolve every FK in the statement against the
4614        // already-known catalog. Validates: parent table exists,
4615        // parent column names exist, arity matches, parent columns
4616        // have a PK / UNIQUE index. Self-referencing FKs (parent
4617        // table == this table) resolve against the column list we
4618        // just built — they don't need the catalog yet.
4619        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
4620            Vec::with_capacity(stmt.foreign_keys.len());
4621        for fk in stmt.foreign_keys {
4622            // v7.14.0 — when SET FOREIGN_KEY_CHECKS=0 is in effect
4623            // (mysqldump preamble + bulk imports), defer FK
4624            // resolution if the parent table isn't in the catalog
4625            // yet. The FK is queued and resolved when checks flip
4626            // back on. Self-references stay in-band (the parent is
4627            // the same as the child we're building).
4628            let needs_parent = !fk.parent_table.eq_ignore_ascii_case(&table_name);
4629            if !self.foreign_key_checks
4630                && needs_parent
4631                && self.active_catalog().get(&fk.parent_table).is_none()
4632            {
4633                self.pending_foreign_keys.push((table_name.clone(), fk));
4634                continue;
4635            }
4636            fks.push(resolve_foreign_key(
4637                &table_name,
4638                &cols,
4639                fk,
4640                self.active_catalog(),
4641            )?);
4642        }
4643        let mut schema = TableSchema::new(table_name.clone(), cols);
4644        schema.foreign_keys = fks;
4645        // v7.9.19 — translate AST table_constraints to storage
4646        // UniquenessConstraints (column name → position) so the
4647        // INSERT enforcement helper sees positions directly.
4648        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
4649        let mut check_exprs: Vec<String> = Vec::new();
4650        for tc in &stmt.table_constraints {
4651            let (is_pk, names, nnd) = match tc {
4652                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4653                    (true, columns.clone(), false)
4654                }
4655                spg_sql::ast::TableConstraint::Unique {
4656                    columns,
4657                    nulls_not_distinct,
4658                    ..
4659                } => (false, columns.clone(), *nulls_not_distinct),
4660                spg_sql::ast::TableConstraint::Check { expr, .. } => {
4661                    // v7.13.0 — collect CHECK predicate sources;
4662                    // they get attached to the schema below.
4663                    check_exprs.push(alloc::format!("{expr}"));
4664                    continue;
4665                }
4666                // v7.15.0 — plain `KEY (cols)` from MySQL inline
4667                // is NOT a uniqueness constraint; skip the UC
4668                // build path entirely. The BTree index lands in
4669                // the post-create loop below alongside the PK/UQ
4670                // implicit indexes.
4671                spg_sql::ast::TableConstraint::Index { .. } => continue,
4672            };
4673            let mut positions = Vec::with_capacity(names.len());
4674            for n in &names {
4675                let pos = schema
4676                    .columns
4677                    .iter()
4678                    .position(|c| c.name == *n)
4679                    .ok_or_else(|| {
4680                        EngineError::Unsupported(alloc::format!(
4681                            "table constraint references unknown column {n:?}"
4682                        ))
4683                    })?;
4684                positions.push(pos);
4685            }
4686            uc_storage.push(spg_storage::UniquenessConstraint {
4687                is_primary_key: is_pk,
4688                columns: positions,
4689                nulls_not_distinct: nnd,
4690            });
4691        }
4692        schema.uniqueness_constraints = uc_storage.clone();
4693        schema.checks = check_exprs;
4694        self.active_catalog_mut().create_table(schema)?;
4695        // v7.9.13 — implicit BTree per inline PK column +
4696        // v7.9.19 — implicit BTree on the leading column of every
4697        // table-level PRIMARY KEY / UNIQUE constraint.
4698        let table = self
4699            .active_catalog_mut()
4700            .get_mut(&table_name)
4701            .expect("just created");
4702        for (i, col_name) in inline_pk_columns.iter().enumerate() {
4703            let idx_name = if inline_pk_columns.len() == 1 {
4704                alloc::format!("{table_name}_pkey")
4705            } else {
4706                alloc::format!("{table_name}_pkey_{i}")
4707            };
4708            if let Err(e) = table.add_index(idx_name, col_name) {
4709                return Err(EngineError::Storage(e));
4710            }
4711        }
4712        for (i, tc) in stmt.table_constraints.iter().enumerate() {
4713            // v7.15.0 — plain KEY/INDEX rides this same loop so
4714            // the implicit BTree gets built. It carries its own
4715            // user-supplied name; PK/UQ still synthesise.
4716            let (suffix, names, explicit_name): (&str, &Vec<String>, Option<&String>) = match tc {
4717                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4718                    ("pkey", columns, None)
4719                }
4720                spg_sql::ast::TableConstraint::Unique { columns, .. } => ("key", columns, None),
4721                spg_sql::ast::TableConstraint::Index { name, columns } => {
4722                    ("idx", columns, name.as_ref())
4723                }
4724                spg_sql::ast::TableConstraint::Check { .. } => continue,
4725            };
4726            let leading = &names[0];
4727            // Skip if a same-column BTree already exists (e.g.
4728            // inline PK on the leading column).
4729            let already = table.indices().iter().any(|idx| {
4730                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
4731                    && table.schema().columns[idx.column_position].name == *leading
4732            });
4733            if already {
4734                continue;
4735            }
4736            let idx_name = if let Some(n) = explicit_name {
4737                n.clone()
4738            } else if names.len() == 1 {
4739                alloc::format!("{table_name}_{leading}_{suffix}")
4740            } else {
4741                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
4742            };
4743            if let Err(e) = table.add_index(idx_name, leading) {
4744                return Err(EngineError::Storage(e));
4745            }
4746        }
4747        Ok(QueryResult::CommandOk {
4748            affected: 0,
4749            modified_catalog: !self.in_transaction(),
4750        })
4751    }
4752
4753    fn exec_insert(&mut self, stmt: InsertStatement) -> Result<QueryResult, EngineError> {
4754        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
4755        // round-5 G4). Execute the inner SELECT first, then route
4756        // back through the regular VALUES code path with the
4757        // materialised rows.
4758        if let Some(select) = stmt.select_source.clone() {
4759            let select_result = self.exec_select_cancel(&select, CancelToken::none())?;
4760            let rows = match select_result {
4761                QueryResult::Rows { rows, .. } => rows,
4762                other => {
4763                    return Err(EngineError::Unsupported(alloc::format!(
4764                        "INSERT … SELECT: inner statement produced {other:?} instead of a row set"
4765                    )));
4766                }
4767            };
4768            let mut materialised: Vec<Vec<Expr>> = Vec::with_capacity(rows.len());
4769            for row in rows {
4770                let mut tuple: Vec<Expr> = Vec::with_capacity(row.values.len());
4771                for v in row.values {
4772                    tuple.push(value_to_literal_expr_permissive(v)?);
4773                }
4774                materialised.push(tuple);
4775            }
4776            let recurse = InsertStatement {
4777                table: stmt.table,
4778                columns: stmt.columns,
4779                rows: materialised,
4780                select_source: None,
4781                on_conflict: stmt.on_conflict,
4782                returning: stmt.returning,
4783            };
4784            return self.exec_insert(recurse);
4785        }
4786        // v7.9.21 — snapshot the clock fn pointer before the mut
4787        // borrow on the catalog opens; runtime DEFAULT eval needs
4788        // it inside the row hot loop.
4789        let clock = self.clock;
4790        // v7.12.4 — snapshot row-level triggers + their referenced
4791        // functions before the mut borrow on the catalog opens.
4792        // Cloned out so the row hot loop can fire them without
4793        // re-borrowing the catalog (which would conflict with
4794        // table.insert's mutable borrow).
4795        let before_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "BEFORE");
4796        let after_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "AFTER");
4797        let trigger_session_cfg: Option<alloc::string::String> = self
4798            .session_params
4799            .get("default_text_search_config")
4800            .cloned();
4801        let table = self
4802            .active_catalog_mut()
4803            .get_mut(&stmt.table)
4804            .ok_or_else(|| {
4805                EngineError::Storage(StorageError::TableNotFound {
4806                    name: stmt.table.clone(),
4807                })
4808            })?;
4809        // v3.1.5: clone the columns vector only (not the whole
4810        // TableSchema — saves one String alloc for the table name).
4811        // We need an owned snapshot because we'll call `table.insert`
4812        // (mutable borrow on `table`) inside the row loop while
4813        // reading schema fields.
4814        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
4815        let schema_cols_len = column_meta.len();
4816        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
4817        // column `c` is filled from the `j`-th tuple slot; `None` means
4818        // "fill with NULL". Validated once and reused for every row.
4819        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
4820            None => None, // 1-1 mapping, fast path
4821            Some(cols) => {
4822                let mut map = alloc::vec![None; schema_cols_len];
4823                for (j, name) in cols.iter().enumerate() {
4824                    let idx = column_meta
4825                        .iter()
4826                        .position(|c| c.name == *name)
4827                        .ok_or_else(|| {
4828                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
4829                        })?;
4830                    if map[idx].is_some() {
4831                        return Err(EngineError::Storage(StorageError::ArityMismatch {
4832                            expected: schema_cols_len,
4833                            actual: cols.len(),
4834                        }));
4835                    }
4836                    map[idx] = Some(j);
4837                }
4838                // Omitted columns must either be nullable, carry a
4839                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
4840                // omissions up front so the WAL stays clean.
4841                for (i, col) in column_meta.iter().enumerate() {
4842                    if map[i].is_none()
4843                        && !col.nullable
4844                        && col.default.is_none()
4845                        && col.runtime_default.is_none()
4846                        && !col.auto_increment
4847                    {
4848                        return Err(EngineError::Storage(StorageError::NullInNotNull {
4849                            column: col.name.clone(),
4850                        }));
4851                    }
4852                }
4853                Some(map)
4854            }
4855        };
4856        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
4857        // v7.6.2 — snapshot this table's FK list before the
4858        // mutable-borrow window so we can run parent lookups
4859        // against the immutable catalog after parsing. Empty vec is
4860        // the no-FK fast path; clone cost is O(fks * arity) which
4861        // is < 100 ns for typical schemas.
4862        let fks = table.schema().foreign_keys.clone();
4863        let mut affected = 0usize;
4864        // Stage 1 — parse + AUTO_INC + coerce all rows under the
4865        // single mutable borrow.
4866        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
4867        for tuple in stmt.rows {
4868            if tuple.len() != expected_tuple_len {
4869                return Err(EngineError::Storage(StorageError::ArityMismatch {
4870                    expected: expected_tuple_len,
4871                    actual: tuple.len(),
4872                }));
4873            }
4874            // Fast path: no column-list permutation → tuple slot j
4875            // maps to schema column j. We can zip schema with tuple
4876            // and skip the `raw_tuple` staging allocation entirely.
4877            let values: Vec<Value> = if let Some(map) = &tuple_pos {
4878                // Permuted path: still need raw_tuple to index by `map[i]`.
4879                let raw_tuple: Vec<Value> = tuple
4880                    .into_iter()
4881                    .map(literal_expr_to_value)
4882                    .collect::<Result<_, _>>()?;
4883                let mut out = Vec::with_capacity(schema_cols_len);
4884                for (i, col) in column_meta.iter().enumerate() {
4885                    let mut raw = match map[i] {
4886                        Some(j) => raw_tuple[j].clone(),
4887                        None => resolve_column_default_free(col, clock)?,
4888                    };
4889                    if col.auto_increment && raw.is_null() {
4890                        let next = table.next_auto_value(i).ok_or_else(|| {
4891                            EngineError::Unsupported(alloc::format!(
4892                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4893                                col.name
4894                            ))
4895                        })?;
4896                        raw = Value::BigInt(next);
4897                    }
4898                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4899                }
4900                out
4901            } else {
4902                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
4903                let mut out = Vec::with_capacity(schema_cols_len);
4904                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
4905                    let mut raw = literal_expr_to_value(expr)?;
4906                    if col.auto_increment && raw.is_null() {
4907                        let next = table.next_auto_value(i).ok_or_else(|| {
4908                            EngineError::Unsupported(alloc::format!(
4909                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4910                                col.name
4911                            ))
4912                        })?;
4913                        raw = Value::BigInt(next);
4914                    }
4915                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4916                }
4917                out
4918            };
4919            all_values.push(values);
4920        }
4921        // Stage 2 — FK enforcement on the immutable catalog.
4922        // Non-lexical lifetimes release the mutable borrow on
4923        // `table` here since stage 1 was the last use. The
4924        // parent-table lookup runs before any row is committed.
4925        let uniqueness = table.schema().uniqueness_constraints.clone();
4926        let _ = table;
4927        if !fks.is_empty() {
4928            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
4929        }
4930        // v7.13.0 — CHECK constraint enforcement (mailrs round-5 G3).
4931        enforce_check_constraints(self.active_catalog(), &stmt.table, &all_values)?;
4932        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
4933        enforce_uniqueness_inserts(self.active_catalog(), &stmt.table, &uniqueness, &all_values)?;
4934        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
4935        // Independent of table-level UniquenessConstraint (which
4936        // can't carry a predicate). Walks the table's indexes;
4937        // for each `is_unique` index, only rows whose
4938        // partial_predicate evaluates truthy are checked for
4939        // collision. mailrs K1.
4940        enforce_unique_index_inserts(self.active_catalog(), &stmt.table, &all_values)?;
4941        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
4942        //   - `DO NOTHING` filters `all_values` to non-conflicting
4943        //     rows + drops within-batch duplicates.
4944        //   - `DO UPDATE SET …` ALSO filters, but for each
4945        //     conflicting row it queues an UPDATE on the existing
4946        //     row using the incoming row's values as `EXCLUDED.*`.
4947        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
4948        let mut skipped_count = 0usize;
4949        if let Some(clause) = &stmt.on_conflict {
4950            let conflict_cols = resolve_on_conflict_columns(
4951                self.active_catalog(),
4952                &stmt.table,
4953                clause.target_columns.as_slice(),
4954            )?;
4955            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
4956            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
4957            for values in all_values {
4958                let key_tuple: Vec<&Value> = conflict_cols.iter().map(|&c| &values[c]).collect();
4959                // SQL spec: NULL in any conflict column means "no
4960                // conflict possible" (NULL ≠ NULL for uniqueness).
4961                let has_null_key = key_tuple.iter().any(|v| matches!(v, Value::Null));
4962                let collides_with_table = !has_null_key
4963                    && on_conflict_keys_exist(
4964                        self.active_catalog(),
4965                        &stmt.table,
4966                        &conflict_cols,
4967                        &key_tuple,
4968                    );
4969                let key_tuple_owned: Vec<Value> = key_tuple.iter().map(|v| (*v).clone()).collect();
4970                let collides_with_batch =
4971                    !has_null_key && seen_keys.iter().any(|k| k == &key_tuple_owned);
4972                let collides = collides_with_table || collides_with_batch;
4973                match (&clause.action, collides) {
4974                    (_, false) => {
4975                        seen_keys.push(key_tuple_owned);
4976                        kept.push(values);
4977                    }
4978                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
4979                        skipped_count += 1;
4980                    }
4981                    (
4982                        spg_sql::ast::OnConflictAction::Update {
4983                            assignments,
4984                            where_,
4985                        },
4986                        true,
4987                    ) => {
4988                        if !collides_with_table {
4989                            skipped_count += 1;
4990                            continue;
4991                        }
4992                        let target_pos = lookup_row_position_by_keys(
4993                            self.active_catalog(),
4994                            &stmt.table,
4995                            &conflict_cols,
4996                            &key_tuple,
4997                        )
4998                        .ok_or_else(|| {
4999                            EngineError::Unsupported(
5000                                "ON CONFLICT DO UPDATE: conflict detected but row \
5001                                 position could not be resolved (cold-tier row?)"
5002                                    .into(),
5003                            )
5004                        })?;
5005                        let updated = apply_on_conflict_assignments(
5006                            self.active_catalog(),
5007                            &stmt.table,
5008                            target_pos,
5009                            &values,
5010                            assignments,
5011                            where_.as_ref(),
5012                        )?;
5013                        if let Some(new_row) = updated {
5014                            pending_updates.push((target_pos, new_row));
5015                        } else {
5016                            skipped_count += 1;
5017                        }
5018                    }
5019                }
5020            }
5021            all_values = kept;
5022        }
5023        // Stage 3 — insert all rows under a fresh mutable borrow.
5024        let table = self
5025            .active_catalog_mut()
5026            .get_mut(&stmt.table)
5027            .ok_or_else(|| {
5028                EngineError::Storage(StorageError::TableNotFound {
5029                    name: stmt.table.clone(),
5030                })
5031            })?;
5032        // v7.9.4 — keep RETURNING projection rows separate per
5033        // INSERT and per UPDATE branch so DO UPDATE pushes the new
5034        // post-update state, not the incoming-only values.
5035        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
5036        // v7.12.7 — collect embedded SQL emitted by any trigger
5037        // fire across the row loop; engine drains the queue after
5038        // the table mut borrow drops.
5039        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
5040        'rowloop: for values in all_values {
5041            let mut row = Row::new(values);
5042            // v7.12.4 — BEFORE INSERT row-level triggers. Each
5043            // trigger may rewrite NEW cells (e.g. populate
5044            // `search_vector := to_tsvector(...)`) and may return
5045            // NULL to skip the row entirely.
5046            for fd in &before_insert_triggers {
5047                let (outcome, deferred) = triggers::fire_row_trigger(
5048                    fd,
5049                    Some(row.clone()),
5050                    None,
5051                    &stmt.table,
5052                    &column_meta,
5053                    &[],
5054                    trigger_session_cfg.as_deref(),
5055                    false,
5056                )
5057                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
5058                deferred_embedded.extend(deferred);
5059                match outcome {
5060                    triggers::TriggerOutcome::Row(r) => row = r,
5061                    triggers::TriggerOutcome::Skip => continue 'rowloop,
5062                }
5063            }
5064            if stmt.returning.is_some() {
5065                returning_rows.push(row.values.clone());
5066            }
5067            // v7.12.4 — clone for the AFTER trigger view; insert
5068            // moves the row into the table.
5069            let inserted = row.clone();
5070            table.insert(row)?;
5071            affected += 1;
5072            // v7.12.4 — AFTER INSERT row-level triggers fire post-
5073            // write. Return value is ignored (PG semantics); we
5074            // surface any error from the body up to the caller.
5075            for fd in &after_insert_triggers {
5076                let (_outcome, deferred) = triggers::fire_row_trigger(
5077                    fd,
5078                    Some(inserted.clone()),
5079                    None,
5080                    &stmt.table,
5081                    &column_meta,
5082                    &[],
5083                    trigger_session_cfg.as_deref(),
5084                    true,
5085                )
5086                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
5087                deferred_embedded.extend(deferred);
5088            }
5089        }
5090        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
5091        // in the conflict-resolution pass. update_row handles
5092        // index maintenance + body re-encoding.
5093        for (pos, new_row) in pending_updates {
5094            if stmt.returning.is_some() {
5095                returning_rows.push(new_row.clone());
5096            }
5097            table.update_row(pos, new_row)?;
5098            affected += 1;
5099        }
5100        let _ = skipped_count;
5101        // v7.12.7 — drop the table mut borrow and drain any
5102        // trigger-emitted embedded SQL queued during this INSERT.
5103        // The borrow has to release first because each deferred
5104        // stmt may UPDATE / INSERT / DELETE the same (or another)
5105        // table — including, in principle, this one.
5106        let _ = table;
5107        self.execute_deferred_trigger_stmts(deferred_embedded, CancelToken::none())?;
5108        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
5109        // up in the table after this statement (insert or
5110        // post-update on conflict).
5111        if let Some(items) = &stmt.returning {
5112            return self.build_returning_rows(&stmt.table, items, returning_rows);
5113        }
5114        // v6.2.1 — auto-analyze: track per-table modified-row
5115        // counter so the background sweep can decide when to
5116        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
5117        // — one BTreeMap entry update per INSERT batch.
5118        if !self.in_transaction() && affected > 0 {
5119            self.statistics
5120                .record_modifications(&stmt.table, affected as u64);
5121        }
5122        Ok(QueryResult::CommandOk {
5123            affected,
5124            modified_catalog: !self.in_transaction(),
5125        })
5126    }
5127
5128    /// v4.5: SELECT with cooperative cancellation. The token is
5129    /// honoured between UNION peers and inside the bare-SELECT row
5130    /// loop; HNSW kNN graph walks and the aggregate executor don't
5131    /// honour it yet (deferred — those paths bound their work
5132    /// internally by `LIMIT k` and `GROUP BY` cardinality).
5133    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
5134    /// by id, decodes each row body against the table's current
5135    /// schema, applies the SELECT's projection + optional WHERE +
5136    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
5137    /// / ORDER BY are unsupported on this path (STABILITY carve-
5138    /// out); operators wanting them should restore the segment
5139    /// into a regular table first.
5140    fn exec_select_as_of_segment(
5141        &self,
5142        stmt: &SelectStatement,
5143        from: &spg_sql::ast::FromClause,
5144        segment_id: u32,
5145    ) -> Result<QueryResult, EngineError> {
5146        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
5147        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
5148        if !from.joins.is_empty()
5149            || stmt.group_by.is_some()
5150            || stmt.having.is_some()
5151            || !stmt.unions.is_empty()
5152            || !stmt.order_by.is_empty()
5153            || stmt.offset.is_some()
5154            || stmt.distinct
5155            || aggregate::uses_aggregate(stmt)
5156        {
5157            return Err(EngineError::Unsupported(
5158                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
5159                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
5160                    .into(),
5161            ));
5162        }
5163        let table = self
5164            .active_catalog()
5165            .get(&from.primary.name)
5166            .ok_or_else(|| StorageError::TableNotFound {
5167                name: from.primary.name.clone(),
5168            })?;
5169        let schema = table.schema().clone();
5170        let schema_cols = &schema.columns;
5171        let alias = from
5172            .primary
5173            .alias
5174            .as_deref()
5175            .unwrap_or(from.primary.name.as_str());
5176        let ctx = EvalContext::new(schema_cols, Some(alias));
5177        let seg = self
5178            .active_catalog()
5179            .cold_segment(segment_id)
5180            .ok_or_else(|| {
5181                EngineError::Unsupported(alloc::format!(
5182                    "AS OF SEGMENT: cold segment {segment_id} not registered"
5183                ))
5184            })?;
5185        let mut out_rows: Vec<Row> = Vec::new();
5186        let mut limit_remaining: Option<usize> =
5187            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
5188        for (_key, body) in seg.scan() {
5189            let (row, _consumed) =
5190                spg_storage::decode_row_body_dense(&body, &schema).map_err(EngineError::Storage)?;
5191            if let Some(where_expr) = &stmt.where_ {
5192                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
5193                if !matches!(cond, Value::Bool(true)) {
5194                    continue;
5195                }
5196            }
5197            // Projection.
5198            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
5199            out_rows.push(projected);
5200            if let Some(rem) = limit_remaining.as_mut() {
5201                if *rem == 0 {
5202                    out_rows.pop();
5203                    break;
5204                }
5205                *rem -= 1;
5206            }
5207        }
5208        // Output column schema: derive from SELECT items.
5209        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
5210        Ok(QueryResult::Rows {
5211            columns,
5212            rows: out_rows,
5213        })
5214    }
5215
5216    /// v6.10.2 — simple-path WHERE eval that doesn't go through
5217    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
5218    /// scan paths predicate against a snapshot frozen segment, no
5219    /// cross-row state.
5220    fn eval_expr_simple(
5221        &self,
5222        expr: &Expr,
5223        row: &Row,
5224        ctx: &EvalContext,
5225    ) -> Result<Value, EngineError> {
5226        let cancel = CancelToken::none();
5227        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
5228    }
5229
5230    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
5231    /// Given the table name, the user-supplied projection items,
5232    /// and the mutated rows (post-insert / post-update values, or
5233    /// pre-delete snapshot), build a `QueryResult::Rows` whose
5234    /// schema describes the projected columns. Mailrs migration
5235    /// blocker #1.
5236    fn build_returning_rows(
5237        &self,
5238        table_name: &str,
5239        items: &[SelectItem],
5240        mutated_rows: Vec<Vec<Value>>,
5241    ) -> Result<QueryResult, EngineError> {
5242        let table = self.active_catalog().get(table_name).ok_or_else(|| {
5243            EngineError::Storage(StorageError::TableNotFound {
5244                name: table_name.into(),
5245            })
5246        })?;
5247        let schema_cols = table.schema().columns.clone();
5248        let columns = self.derive_output_columns(items, &schema_cols, table_name);
5249        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
5250        for values in mutated_rows {
5251            let row = Row::new(values);
5252            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
5253            out_rows.push(projected);
5254        }
5255        Ok(QueryResult::Rows {
5256            columns,
5257            rows: out_rows,
5258        })
5259    }
5260
5261    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
5262    /// `SelectItem::Wildcard` to all schema columns and
5263    /// `SelectItem::Expr` via the regular eval path.
5264    fn project_row_simple(
5265        &self,
5266        row: &Row,
5267        items: &[SelectItem],
5268        schema_cols: &[ColumnSchema],
5269        alias: &str,
5270    ) -> Result<Row, EngineError> {
5271        let ctx = EvalContext::new(schema_cols, Some(alias));
5272        let cancel = CancelToken::none();
5273        let mut out_vals = Vec::new();
5274        for item in items {
5275            match item {
5276                SelectItem::Wildcard => {
5277                    out_vals.extend(row.values.iter().cloned());
5278                }
5279                SelectItem::Expr { expr, .. } => {
5280                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
5281                    out_vals.push(v);
5282                }
5283            }
5284        }
5285        Ok(Row::new(out_vals))
5286    }
5287
5288    /// v6.10.2 — derive the output `ColumnSchema` list for an
5289    /// AS OF SEGMENT projection. Wildcards take the full schema;
5290    /// expressions take the alias if present or a synthetic
5291    /// `?column?` (PG convention) otherwise.
5292    fn derive_output_columns(
5293        &self,
5294        items: &[SelectItem],
5295        schema_cols: &[ColumnSchema],
5296        _alias: &str,
5297    ) -> Vec<ColumnSchema> {
5298        let mut out = Vec::new();
5299        for item in items {
5300            match item {
5301                SelectItem::Wildcard => {
5302                    out.extend(schema_cols.iter().cloned());
5303                }
5304                SelectItem::Expr { alias, .. } => {
5305                    let name = alias.clone().unwrap_or_else(|| "?column?".to_string());
5306                    // Default to Text; the caller's row values
5307                    // carry the actual type. v6.10.2 scope.
5308                    out.push(ColumnSchema::new(name, DataType::Text, true));
5309                }
5310            }
5311        }
5312        out
5313    }
5314
5315    fn exec_select_cancel(
5316        &self,
5317        stmt: &SelectStatement,
5318        cancel: CancelToken<'_>,
5319    ) -> Result<QueryResult, EngineError> {
5320        cancel.check()?;
5321        // v7.16.2 — information_schema / pg_catalog virtual
5322        // views (mailrs round-10 A.3). If the SELECT touches a
5323        // synthetic meta-table name (`__spg_info_*` /
5324        // `__spg_pg_*` — produced by the parser for
5325        // `information_schema.X` / `pg_catalog.X`), clone the
5326        // catalog, materialise the requested view as a real
5327        // temporary table, and re-execute against an enriched
5328        // engine. Same pattern as `exec_with_ctes` for CTEs.
5329        if !self.meta_views_materialised && select_references_meta_view(stmt) {
5330            return self.exec_select_with_meta_views(stmt, cancel);
5331        }
5332        // v6.10.2 — cold-tier time-travel short-circuit. When the
5333        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
5334        // dedicated cold-segment scan instead of the regular
5335        // hot+index path. The scope is intentionally narrow for
5336        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
5337        // optionally with a single-column-equality WHERE. JOINs /
5338        // aggregates / ORDER BY / subqueries on top of a time-
5339        // travelled scan are STABILITY § "Out of v6.10".
5340        if let Some(from) = &stmt.from
5341            && let Some(seg_id) = from.primary.as_of_segment
5342        {
5343            return self.exec_select_as_of_segment(stmt, from, seg_id);
5344        }
5345        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
5346        // pre-CTE because they don't read from the catalog and
5347        // shouldn't participate in regular FROM resolution.
5348        if let Some(from) = &stmt.from
5349            && from.joins.is_empty()
5350            && stmt.where_.is_none()
5351            && stmt.group_by.is_none()
5352            && stmt.having.is_none()
5353            && stmt.unions.is_empty()
5354            && stmt.order_by.is_empty()
5355            && stmt.limit.is_none()
5356            && stmt.offset.is_none()
5357            && !stmt.distinct
5358            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
5359        {
5360            let lower = from.primary.name.to_ascii_lowercase();
5361            match lower.as_str() {
5362                "spg_statistic" => return Ok(self.exec_spg_statistic()),
5363                // v6.5.0 — observability v2 virtual tables.
5364                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
5365                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
5366                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
5367                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
5368                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
5369                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
5370                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
5371                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
5372                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
5373                _ => {}
5374            }
5375        }
5376        // v4.11: CTEs materialise into a temporary enriched catalog
5377        // *before* anything else — the body SELECT can then refer
5378        // to CTE names via the regular FROM-clause resolution.
5379        // Uncorrelated only: each CTE body runs once against the
5380        // current catalog, not against later CTEs' results (left-
5381        // to-right materialisation would relax this, but we keep
5382        // it simple for v4.11 MVP).
5383        if !stmt.ctes.is_empty() {
5384            return self.exec_with_ctes(stmt, cancel);
5385        }
5386        // v4.10: subqueries (uncorrelated) are resolved here, before
5387        // the executor sees the row loop. We clone the statement so
5388        // we can mutate without disturbing the caller's AST — most
5389        // queries pass through with no subquery nodes and the clone
5390        // is cheap; with subqueries the materialisation cost
5391        // dominates anyway.
5392        let mut stmt_owned;
5393        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
5394            stmt_owned = stmt.clone();
5395            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
5396            &stmt_owned
5397        } else {
5398            stmt
5399        };
5400        if stmt_ref.unions.is_empty() {
5401            return self.exec_bare_select_cancel(stmt_ref, cancel);
5402        }
5403        // UNION path: clone-strip the head into a bare block (its own
5404        // DISTINCT and any inner ORDER BY are dropped by parser rule —
5405        // the wrapper SelectStatement carries them), execute, then chain
5406        // peers with left-associative dedup semantics.
5407        let mut head = stmt_ref.clone();
5408        head.unions = Vec::new();
5409        head.order_by = Vec::new();
5410        head.limit = None;
5411        let QueryResult::Rows { columns, mut rows } =
5412            self.exec_bare_select_cancel(&head, cancel)?
5413        else {
5414            unreachable!("bare SELECT cannot return CommandOk")
5415        };
5416        for (kind, peer) in &stmt_ref.unions {
5417            let QueryResult::Rows {
5418                columns: peer_cols,
5419                rows: peer_rows,
5420            } = self.exec_bare_select_cancel(peer, cancel)?
5421            else {
5422                unreachable!("bare SELECT cannot return CommandOk")
5423            };
5424            if peer_cols.len() != columns.len() {
5425                return Err(EngineError::Unsupported(alloc::format!(
5426                    "UNION arity mismatch: head has {} columns, peer has {}",
5427                    columns.len(),
5428                    peer_cols.len()
5429                )));
5430            }
5431            rows.extend(peer_rows);
5432            if matches!(kind, UnionKind::Distinct) {
5433                rows = dedup_rows(rows);
5434            }
5435        }
5436        // ORDER BY at the top of a UNION applies to the combined result.
5437        // Eval against the projected schema (NOT the source table).
5438        if !stmt.order_by.is_empty() {
5439            let synth_ctx = EvalContext::new(&columns, None);
5440            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5441            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
5442            for r in rows {
5443                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
5444                tagged.push((keys, r));
5445            }
5446            sort_by_keys(&mut tagged, &descs);
5447            rows = tagged.into_iter().map(|(_, r)| r).collect();
5448        }
5449        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
5450        Ok(QueryResult::Rows { columns, rows })
5451    }
5452
5453    #[allow(clippy::too_many_lines)]
5454    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
5455    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
5456    /// Synthesises a single-column virtual table whose column type
5457    /// is TEXT and whose rows are the array elements. Routes
5458    /// through the regular projection / WHERE / ORDER BY / LIMIT
5459    /// machinery so set-returning UNNEST composes naturally with
5460    /// the rest of the SELECT surface.
5461    fn exec_select_unnest(
5462        &self,
5463        stmt: &SelectStatement,
5464        primary: &TableRef,
5465        cancel: CancelToken<'_>,
5466    ) -> Result<QueryResult, EngineError> {
5467        let expr = primary
5468            .unnest_expr
5469            .as_deref()
5470            .expect("caller guards unnest_expr.is_some()");
5471        // Evaluate the array expression once. Empty schema / empty
5472        // row — uncorrelated UNNEST cannot reference outer columns.
5473        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
5474        let ctx = EvalContext::new(&empty_schema, None);
5475        let dummy_row = Row::new(alloc::vec::Vec::new());
5476        // v7.11.13 — unnest dispatches per array element type so
5477        // INT[] / BIGINT[] surface their PG types in projection.
5478        let (elem_dtype, rows): (DataType, alloc::vec::Vec<Row>) =
5479            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5480                Value::Null => (DataType::Text, alloc::vec::Vec::new()),
5481                Value::TextArray(items) => {
5482                    let rows = items
5483                        .into_iter()
5484                        .map(|item| {
5485                            Row::new(alloc::vec![match item {
5486                                Some(s) => Value::Text(s),
5487                                None => Value::Null,
5488                            }])
5489                        })
5490                        .collect();
5491                    (DataType::Text, rows)
5492                }
5493                Value::IntArray(items) => {
5494                    let rows = items
5495                        .into_iter()
5496                        .map(|item| {
5497                            Row::new(alloc::vec![match item {
5498                                Some(n) => Value::Int(n),
5499                                None => Value::Null,
5500                            }])
5501                        })
5502                        .collect();
5503                    (DataType::Int, rows)
5504                }
5505                Value::BigIntArray(items) => {
5506                    let rows = items
5507                        .into_iter()
5508                        .map(|item| {
5509                            Row::new(alloc::vec![match item {
5510                                Some(n) => Value::BigInt(n),
5511                                None => Value::Null,
5512                            }])
5513                        })
5514                        .collect();
5515                    (DataType::BigInt, rows)
5516                }
5517                other => {
5518                    return Err(EngineError::Unsupported(alloc::format!(
5519                        "unnest() expects an array argument, got {:?}",
5520                        other.data_type()
5521                    )));
5522                }
5523            };
5524        let alias = primary
5525            .alias
5526            .clone()
5527            .unwrap_or_else(|| "unnest".to_string());
5528        // v7.13.2 — mailrs round-6 S5. Honour PG-standard
5529        // `UNNEST(arr) AS p(col_name)` column-list aliasing: the
5530        // first entry overrides the projected column's name.
5531        // Without the column list, fall back to the table alias
5532        // (pre-v7.13.2 behaviour).
5533        let col_name = primary
5534            .unnest_column_aliases
5535            .first()
5536            .cloned()
5537            .unwrap_or_else(|| alias.clone());
5538        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
5539        let schema_cols = alloc::vec![col_schema.clone()];
5540        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
5541        // Apply WHERE.
5542        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
5543            let mut out = alloc::vec::Vec::with_capacity(rows.len());
5544            for row in rows {
5545                cancel.check()?;
5546                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
5547                if matches!(v, Value::Bool(true)) {
5548                    out.push(row);
5549                }
5550            }
5551            out
5552        } else {
5553            rows
5554        };
5555        // Projection.
5556        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
5557        let mut projected_rows: alloc::vec::Vec<Row> =
5558            alloc::vec::Vec::with_capacity(filtered.len());
5559        for row in &filtered {
5560            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
5561            for p in &projection {
5562                vals.push(eval::eval_expr(&p.expr, row, &scan_ctx).map_err(EngineError::Eval)?);
5563            }
5564            projected_rows.push(Row::new(vals));
5565        }
5566        // ORDER BY / LIMIT — apply on the projected rows (cheap;
5567        // unnest result sets are small by design).
5568        let columns: alloc::vec::Vec<ColumnSchema> = projection
5569            .iter()
5570            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
5571            .collect();
5572        // Re-evaluate ORDER BY against the source schema (pre-projection
5573        // so col refs by name still resolve through `scan_ctx`).
5574        if !stmt.order_by.is_empty() {
5575            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
5576                .iter()
5577                .enumerate()
5578                .map(|(i, r)| -> Result<_, EngineError> {
5579                    let keys: Result<Vec<Value>, EngineError> = stmt
5580                        .order_by
5581                        .iter()
5582                        .map(|ob| {
5583                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
5584                        })
5585                        .collect();
5586                    Ok((i, keys?))
5587                })
5588                .collect::<Result<_, _>>()?;
5589            indexed.sort_by(|a, b| {
5590                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
5591                    let mut cmp = value_cmp(ka, kb);
5592                    if stmt.order_by[idx].desc {
5593                        cmp = cmp.reverse();
5594                    }
5595                    if cmp != core::cmp::Ordering::Equal {
5596                        return cmp;
5597                    }
5598                }
5599                core::cmp::Ordering::Equal
5600            });
5601            projected_rows = indexed
5602                .into_iter()
5603                .map(|(i, _)| projected_rows[i].clone())
5604                .collect();
5605        }
5606        // LIMIT / OFFSET — apply at the tail.
5607        if let Some(offset) = stmt.offset_literal() {
5608            let off = (offset as usize).min(projected_rows.len());
5609            projected_rows.drain(..off);
5610        }
5611        if let Some(limit) = stmt.limit_literal() {
5612            projected_rows.truncate(limit as usize);
5613        }
5614        Ok(QueryResult::Rows {
5615            columns,
5616            rows: projected_rows,
5617        })
5618    }
5619
5620    fn exec_bare_select_cancel(
5621        &self,
5622        stmt: &SelectStatement,
5623        cancel: CancelToken<'_>,
5624    ) -> Result<QueryResult, EngineError> {
5625        // v7.16.2 — same meta-view dispatch as
5626        // `exec_select_cancel`, applied here too because
5627        // `subquery_replacement` enters this function directly
5628        // for Exists / ScalarSubquery / InSubquery resolution
5629        // (bypassing the top-level entry to avoid double
5630        // subquery walking). Without this dispatch the subquery
5631        // hits `__spg_info_columns` and reports TableNotFound.
5632        if !self.meta_views_materialised && select_references_meta_view(stmt) {
5633            return self.exec_select_with_meta_views(stmt, cancel);
5634        }
5635        // v4.12: window-function path. When the projection contains
5636        // any `name(args) OVER (...)` we route to the dedicated
5637        // executor — partition + sort + per-row window value before
5638        // the regular projection.
5639        if select_has_window(stmt) {
5640            return self.exec_select_with_window(stmt, cancel);
5641        }
5642        // Constant SELECT (no FROM) — evaluate each item once against an
5643        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
5644        // `SELECT '7'::INT`. Column references will surface as
5645        // ColumnNotFound on eval since the schema is empty.
5646        let Some(from) = &stmt.from else {
5647            let empty_schema: Vec<ColumnSchema> = Vec::new();
5648            let ctx = self.ev_ctx(&empty_schema, None);
5649            let projection = build_projection(&stmt.items, &empty_schema, "")?;
5650            let dummy_row = Row::new(Vec::new());
5651            let mut values = Vec::with_capacity(projection.len());
5652            for p in &projection {
5653                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
5654            }
5655            let columns: Vec<ColumnSchema> = projection
5656                .into_iter()
5657                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5658                .collect();
5659            return Ok(QueryResult::Rows {
5660                columns,
5661                rows: alloc::vec![Row::new(values)],
5662            });
5663        };
5664        // Multi-table FROM (one or more joined peers) goes through the
5665        // nested-loop join executor. Single-table FROM stays on the
5666        // existing scan + index-seek path.
5667        if !from.joins.is_empty() {
5668            return self.exec_joined_select(stmt, from);
5669        }
5670        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
5671        // single-column table at SELECT entry by evaluating the
5672        // expression once against the empty row (UNNEST is
5673        // uncorrelated in v7.11; correlated / LATERAL unnest is a
5674        // v7.12 carve-out). Build a virtual `Table` in a heap-only
5675        // catalog, then route to the regular scan path.
5676        if from.primary.unnest_expr.is_some() {
5677            return self.exec_select_unnest(stmt, &from.primary, cancel);
5678        }
5679        let primary = &from.primary;
5680        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
5681            StorageError::TableNotFound {
5682                name: primary.name.clone(),
5683            }
5684        })?;
5685        let schema_cols = &table.schema().columns;
5686        // The qualifier accepted on column refs is the alias (if any) else the
5687        // bare table name.
5688        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
5689        let ctx = self.ev_ctx(schema_cols, Some(alias));
5690
5691        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
5692        // WHERE and an NSW index on `col` skips the full scan. The
5693        // walk returns rows already in ascending-distance order, so
5694        // ORDER BY / LIMIT are honoured implicitly.
5695        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
5696            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
5697        }
5698
5699        // Index seek: if WHERE is `col = literal` (or commuted) and the
5700        // referenced column has an index, dispatch each locator through
5701        // the catalog (hot tier → borrow, cold tier → page-read +
5702        // decode) and iterate just those rows. Otherwise fall back to a
5703        // full scan over the hot tier (cold-tier rows are only reached
5704        // via index seek in v5.1 — full table scans against cold-tier
5705        // data ship in v5.2 with the freezer's per-segment scan API).
5706        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt.where_.as_ref().and_then(|w| {
5707            // BTree / col=literal seek first — covers the v7.11.3 multi-
5708            // column AND case and the leading-column equality lookup.
5709            try_index_seek(w, schema_cols, self.active_catalog(), table, alias)
5710                .or_else(|| {
5711                    // v7.12.3 — GIN-accelerated `WHERE col @@
5712                    // tsquery` when the column has a `USING gin`
5713                    // index. Returns an over-approximate candidate
5714                    // set; the WHERE re-eval loop below verifies
5715                    // the full `@@` predicate per row.
5716                    try_gin_seek(w, schema_cols, self.active_catalog(), table, alias, &ctx)
5717                })
5718                .or_else(|| {
5719                    // v7.15.0 — trigram-GIN-accelerated
5720                    // `WHERE col LIKE / ILIKE '<pat>'` when the
5721                    // column has a `gin_trgm_ops` GIN index.
5722                    // Over-approximate candidate set; the WHERE
5723                    // re-eval verifies the LIKE per row.
5724                    try_trgm_seek(w, schema_cols, table, alias)
5725                })
5726        });
5727
5728        // Aggregate path: filter rows first, then hand off to the
5729        // aggregate executor which does its own projection + ORDER BY.
5730        if aggregate::uses_aggregate(stmt) {
5731            let mut filtered: Vec<&Row> = Vec::new();
5732            // v6.2.6 — Memoize: per-query LRU cache for correlated
5733            // scalar subqueries. Fresh per row-loop entry so each
5734            // SELECT execution gets an isolated cache.
5735            let mut memo = memoize::MemoizeCache::new();
5736            if let Some(rows) = &indexed_rows {
5737                for cow in rows {
5738                    let row = cow.as_ref();
5739                    if let Some(where_expr) = &stmt.where_ {
5740                        let cond = self.eval_expr_with_correlated(
5741                            where_expr,
5742                            row,
5743                            &ctx,
5744                            cancel,
5745                            Some(&mut memo),
5746                        )?;
5747                        if !matches!(cond, Value::Bool(true)) {
5748                            continue;
5749                        }
5750                    }
5751                    filtered.push(row);
5752                }
5753            } else {
5754                for i in 0..table.row_count() {
5755                    let row = &table.rows()[i];
5756                    if let Some(where_expr) = &stmt.where_ {
5757                        let cond = self.eval_expr_with_correlated(
5758                            where_expr,
5759                            row,
5760                            &ctx,
5761                            cancel,
5762                            Some(&mut memo),
5763                        )?;
5764                        if !matches!(cond, Value::Bool(true)) {
5765                            continue;
5766                        }
5767                    }
5768                    filtered.push(row);
5769                }
5770            }
5771            let mut agg = aggregate::run(stmt, &filtered, schema_cols, Some(alias))?;
5772            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5773            return Ok(QueryResult::Rows {
5774                columns: agg.columns,
5775                rows: agg.rows,
5776            });
5777        }
5778
5779        let projection = build_projection(&stmt.items, schema_cols, alias)?;
5780
5781        // Materialise the filter pass into `(order_key, projected_row)`
5782        // tuples. The order key is `None` when there's no ORDER BY clause.
5783        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5784        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
5785        let mut memo = memoize::MemoizeCache::new();
5786        // Inline the per-row work in a closure so the indexed and full-
5787        // scan branches share the body.
5788        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
5789            if loop_idx.is_multiple_of(256) {
5790                cancel.check()?;
5791            }
5792            if let Some(where_expr) = &stmt.where_ {
5793                let cond =
5794                    self.eval_expr_with_correlated(where_expr, row, &ctx, cancel, Some(&mut memo))?;
5795                if !matches!(cond, Value::Bool(true)) {
5796                    return Ok(());
5797                }
5798            }
5799            let mut values = Vec::with_capacity(projection.len());
5800            for p in &projection {
5801                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5802            }
5803            let order_keys = if stmt.order_by.is_empty() {
5804                Vec::new()
5805            } else {
5806                build_order_keys(&stmt.order_by, row, &ctx)?
5807            };
5808            tagged.push((order_keys, Row::new(values)));
5809            Ok(())
5810        };
5811        if let Some(rows) = &indexed_rows {
5812            for (loop_idx, cow) in rows.iter().enumerate() {
5813                process_row(cow.as_ref(), loop_idx)?;
5814            }
5815        } else {
5816            for i in 0..table.row_count() {
5817                process_row(&table.rows()[i], i)?;
5818            }
5819        }
5820
5821        if !stmt.order_by.is_empty() {
5822            // Partial-sort fast path: when LIMIT is small relative to
5823            // the row count, select_nth_unstable + sort just the
5824            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
5825            // requires the full sort because de-dup happens after.
5826            let keep = if stmt.distinct {
5827                None
5828            } else {
5829                stmt.limit_literal()
5830                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5831            };
5832            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5833            partial_sort_tagged(&mut tagged, keep, &descs);
5834        }
5835
5836        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5837        if stmt.distinct {
5838            output_rows = dedup_rows(output_rows);
5839        }
5840        apply_offset_and_limit(
5841            &mut output_rows,
5842            stmt.offset_literal(),
5843            stmt.limit_literal(),
5844        );
5845
5846        let columns: Vec<ColumnSchema> = projection
5847            .into_iter()
5848            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5849            .collect();
5850
5851        Ok(QueryResult::Rows {
5852            columns,
5853            rows: output_rows,
5854        })
5855    }
5856
5857    /// Multi-table SELECT executor (one or more JOIN peers).
5858    ///
5859    /// v1.10 builds the joined row set up-front via nested-loop joins,
5860    /// then runs WHERE + projection + ORDER BY against the combined
5861    /// rows. No index seek. Aggregates and DISTINCT still work because
5862    /// the executor delegates projection through the same shared paths.
5863    #[allow(clippy::too_many_lines)]
5864    /// v7.13.2 — mailrs round-6 S5. Resolve a TableRef into an
5865    /// owned (rows, schema) pair. Catalog tables clone their hot
5866    /// rows + schema; UNNEST table refs evaluate their array
5867    /// expression once and synthesise a single-column row set
5868    /// using the same dispatch as `exec_select_unnest`. Used by
5869    /// the joined-select path so UNNEST can appear in any FROM
5870    /// position, not just as the primary.
5871    fn materialise_table_ref(
5872        &self,
5873        tref: &TableRef,
5874    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
5875        if let Some(expr) = tref.unnest_expr.as_deref() {
5876            let empty_schema: Vec<ColumnSchema> = Vec::new();
5877            let ctx = EvalContext::new(&empty_schema, None);
5878            let dummy_row = Row::new(Vec::new());
5879            let (elem_dtype, rows) =
5880                match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5881                    Value::Null => (DataType::Text, Vec::new()),
5882                    Value::TextArray(items) => (
5883                        DataType::Text,
5884                        items
5885                            .into_iter()
5886                            .map(|item| {
5887                                Row::new(alloc::vec![match item {
5888                                    Some(s) => Value::Text(s),
5889                                    None => Value::Null,
5890                                }])
5891                            })
5892                            .collect(),
5893                    ),
5894                    Value::IntArray(items) => (
5895                        DataType::Int,
5896                        items
5897                            .into_iter()
5898                            .map(|item| {
5899                                Row::new(alloc::vec![match item {
5900                                    Some(n) => Value::Int(n),
5901                                    None => Value::Null,
5902                                }])
5903                            })
5904                            .collect(),
5905                    ),
5906                    Value::BigIntArray(items) => (
5907                        DataType::BigInt,
5908                        items
5909                            .into_iter()
5910                            .map(|item| {
5911                                Row::new(alloc::vec![match item {
5912                                    Some(n) => Value::BigInt(n),
5913                                    None => Value::Null,
5914                                }])
5915                            })
5916                            .collect(),
5917                    ),
5918                    other => {
5919                        return Err(EngineError::Unsupported(alloc::format!(
5920                            "unnest() expects an array argument, got {:?}",
5921                            other.data_type()
5922                        )));
5923                    }
5924                };
5925            let alias = tref.alias.clone().unwrap_or_else(|| "unnest".to_string());
5926            let col_name = tref.unnest_column_aliases.first().cloned().unwrap_or(alias);
5927            return Ok((
5928                rows,
5929                alloc::vec![ColumnSchema::new(col_name, elem_dtype, true)],
5930            ));
5931        }
5932        let table =
5933            self.active_catalog()
5934                .get(&tref.name)
5935                .ok_or_else(|| StorageError::TableNotFound {
5936                    name: tref.name.clone(),
5937                })?;
5938        let rows: Vec<Row> = table.rows().iter().cloned().collect();
5939        let cols = table.schema().columns.clone();
5940        Ok((rows, cols))
5941    }
5942
5943    fn exec_joined_select(
5944        &self,
5945        stmt: &SelectStatement,
5946        from: &FromClause,
5947    ) -> Result<QueryResult, EngineError> {
5948        // v7.13.2 — mailrs round-6 S5. UNNEST peers materialise
5949        // into virtual (rows, schema) sources alongside catalog
5950        // tables, so `FROM t, UNNEST(arr) AS p(col)` works in
5951        // any join-list position. The lookup helper handles both
5952        // shapes uniformly.
5953        let (primary_rows, primary_cols) = self.materialise_table_ref(&from.primary)?;
5954        let primary_alias = from
5955            .primary
5956            .alias
5957            .as_deref()
5958            .unwrap_or(from.primary.name.as_str())
5959            .to_string();
5960        // Owned (rows, schema) per peer — borrows from the catalog
5961        // would not survive UNNEST-side materialisation.
5962        #[allow(clippy::type_complexity)]
5963        let mut joined: Vec<(
5964            Vec<Row>,
5965            Vec<ColumnSchema>,
5966            String,
5967            JoinKind,
5968            Option<&Expr>,
5969        )> = Vec::new();
5970        for j in &from.joins {
5971            let (rows, cols) = self.materialise_table_ref(&j.table)?;
5972            let a = j
5973                .table
5974                .alias
5975                .as_deref()
5976                .unwrap_or(j.table.name.as_str())
5977                .to_string();
5978            joined.push((rows, cols, a, j.kind, j.on.as_ref()));
5979        }
5980
5981        // Build the combined schema: composite "alias.col" names so the
5982        // qualified-column resolver can find anything by exact match.
5983        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
5984        for col in &primary_cols {
5985            combined_schema.push(ColumnSchema::new(
5986                alloc::format!("{primary_alias}.{}", col.name),
5987                col.ty,
5988                col.nullable,
5989            ));
5990        }
5991        for (_, cols, a, _, _) in &joined {
5992            for col in cols {
5993                combined_schema.push(ColumnSchema::new(
5994                    alloc::format!("{a}.{}", col.name),
5995                    col.ty,
5996                    col.nullable,
5997                ));
5998            }
5999        }
6000        let ctx = EvalContext::new(&combined_schema, None);
6001
6002        // Nested-loop join.
6003        let mut working: Vec<Row> = primary_rows;
6004        let mut produced_len = primary_cols.len();
6005        for (rrows, rcols, _, kind, on) in &joined {
6006            let right_arity = rcols.len();
6007            let mut next: Vec<Row> = Vec::new();
6008            for left in &working {
6009                let mut left_matched = false;
6010                for right in rrows {
6011                    let mut combined_vals = left.values.clone();
6012                    combined_vals.extend(right.values.iter().cloned());
6013                    // Pad combined to the eventual full width so the
6014                    // partial schema still matches positions used by ON.
6015                    let combined = Row::new(combined_vals);
6016                    let keep = if let Some(on_expr) = on {
6017                        let cond = eval::eval_expr(on_expr, &combined, &ctx)?;
6018                        matches!(cond, Value::Bool(true))
6019                    } else {
6020                        // CROSS / comma-list: every pair survives.
6021                        true
6022                    };
6023                    if keep {
6024                        next.push(combined);
6025                        left_matched = true;
6026                    }
6027                }
6028                if !left_matched && matches!(kind, JoinKind::Left) {
6029                    // LEFT OUTER JOIN: emit the left row with NULLs on
6030                    // the right side when no peer matched.
6031                    let mut combined_vals = left.values.clone();
6032                    for _ in 0..right_arity {
6033                        combined_vals.push(Value::Null);
6034                    }
6035                    next.push(Row::new(combined_vals));
6036                }
6037            }
6038            working = next;
6039            produced_len += right_arity;
6040            debug_assert!(produced_len <= combined_schema.len());
6041        }
6042
6043        // WHERE filter against combined rows.
6044        let mut filtered: Vec<Row> = Vec::new();
6045        for row in working {
6046            if let Some(where_expr) = &stmt.where_ {
6047                let cond = eval::eval_expr(where_expr, &row, &ctx)?;
6048                if !matches!(cond, Value::Bool(true)) {
6049                    continue;
6050                }
6051            }
6052            filtered.push(row);
6053        }
6054
6055        // Aggregate path: handle GROUP BY / aggregate calls over the
6056        // joined+filtered rows.
6057        if aggregate::uses_aggregate(stmt) {
6058            let refs: Vec<&Row> = filtered.iter().collect();
6059            let mut agg = aggregate::run(stmt, &refs, &combined_schema, None)?;
6060            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
6061            return Ok(QueryResult::Rows {
6062                columns: agg.columns,
6063                rows: agg.rows,
6064            });
6065        }
6066
6067        let projection = build_projection(&stmt.items, &combined_schema, "")?;
6068        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
6069        for row in &filtered {
6070            let mut values = Vec::with_capacity(projection.len());
6071            for p in &projection {
6072                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
6073            }
6074            let order_keys = if stmt.order_by.is_empty() {
6075                Vec::new()
6076            } else {
6077                build_order_keys(&stmt.order_by, row, &ctx)?
6078            };
6079            tagged.push((order_keys, Row::new(values)));
6080        }
6081        if !stmt.order_by.is_empty() {
6082            let keep = if stmt.distinct {
6083                None
6084            } else {
6085                stmt.limit_literal()
6086                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
6087            };
6088            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
6089            partial_sort_tagged(&mut tagged, keep, &descs);
6090        }
6091        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
6092        if stmt.distinct {
6093            output_rows = dedup_rows(output_rows);
6094        }
6095        apply_offset_and_limit(
6096            &mut output_rows,
6097            stmt.offset_literal(),
6098            stmt.limit_literal(),
6099        );
6100        let columns: Vec<ColumnSchema> = projection
6101            .into_iter()
6102            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6103            .collect();
6104        Ok(QueryResult::Rows {
6105            columns,
6106            rows: output_rows,
6107        })
6108    }
6109}
6110
6111/// One row-producing projection: an expression to evaluate, the resulting
6112/// column's user-visible name, its inferred type, and nullability.
6113#[derive(Debug, Clone)]
6114struct ProjectedItem {
6115    expr: Expr,
6116    output_name: String,
6117    ty: DataType,
6118    nullable: bool,
6119}
6120
6121/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
6122/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
6123/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
6124/// the spec's "two NULLs are not distinct"; the second is a tolerated
6125/// quirk for v1 (no NaN literals are reachable from the SQL surface).
6126fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
6127    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
6128    for r in rows {
6129        if !out.iter().any(|seen| seen == &r) {
6130            out.push(r);
6131        }
6132    }
6133    out
6134}
6135
6136/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
6137/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
6138/// order via the byte values; vectors are not sortable.
6139fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
6140    match v {
6141        Value::Null => Ok(f64::INFINITY),
6142        Value::SmallInt(n) => Ok(f64::from(*n)),
6143        Value::Int(n) => Ok(f64::from(*n)),
6144        Value::Date(d) => Ok(f64::from(*d)),
6145        #[allow(clippy::cast_precision_loss)]
6146        Value::Timestamp(t) => Ok(*t as f64),
6147        #[allow(clippy::cast_precision_loss)]
6148        Value::Numeric { scaled, scale } => {
6149            // Scaled integer / 10^scale, computed via f64 for sort
6150            // ordering only. Precision losses here only matter for
6151            // ORDER BY tie-breaks well past 15 significant digits.
6152            // `f64::powi` lives in std; we hand-roll the loop so the
6153            // no_std engine crate doesn't need it.
6154            let mut divisor = 1.0_f64;
6155            for _ in 0..*scale {
6156                divisor *= 10.0;
6157            }
6158            Ok((*scaled as f64) / divisor)
6159        }
6160        #[allow(clippy::cast_precision_loss)]
6161        Value::BigInt(n) => Ok(*n as f64),
6162        Value::Float(x) => Ok(*x),
6163        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
6164        Value::Text(s) => {
6165            // Lex order by codepoints — good enough for ORDER BY name.
6166            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
6167            // partial_cmp Equal. v1.x can swap in a real string comparator.
6168            let mut key: u64 = 0;
6169            for &b in s.as_bytes().iter().take(8) {
6170                key = (key << 8) | u64::from(b);
6171            }
6172            #[allow(clippy::cast_precision_loss)]
6173            Ok(key as f64)
6174        }
6175        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
6176            Err(EngineError::Unsupported(
6177                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
6178            ))
6179        }
6180        Value::Interval { .. } => Err(EngineError::Unsupported(
6181            "ORDER BY of an INTERVAL is not supported in v2.11 \
6182             (months vs micros has no single canonical ordering)"
6183                .into(),
6184        )),
6185        Value::Json(_) => Err(EngineError::Unsupported(
6186            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
6187        )),
6188        // v7.5.0 — Value is #[non_exhaustive]; future variants need
6189        // an explicit ORDER BY mapping. Surface as Unsupported until
6190        // engine support is added.
6191        _ => Err(EngineError::Unsupported(
6192            "ORDER BY of this value type is not supported".into(),
6193        )),
6194    }
6195}
6196
6197/// Try to plan a WHERE clause as an equality lookup against an existing
6198/// index. Returns the candidate row indices on success; `None` means the
6199/// caller should fall back to a full scan.
6200///
6201/// v0.8 recognises a single top-level `col = literal` (in either operand
6202/// order). AND chains and range scans land in later milestones.
6203/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
6204/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
6205/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
6206/// present, the planner does an "over-fetch and filter" pass — it
6207/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
6208/// against each, and trims back to `k`. Returns the row indices in
6209/// ascending-distance order when the plan applies.
6210fn try_nsw_knn(
6211    stmt: &SelectStatement,
6212    table: &Table,
6213    schema_cols: &[ColumnSchema],
6214    table_alias: &str,
6215) -> Option<Vec<usize>> {
6216    if stmt.distinct {
6217        return None;
6218    }
6219    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
6220    if limit == 0 {
6221        return None;
6222    }
6223    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
6224    // distance metric. Multi-key ORDER BY falls through to the
6225    // generic sort path.
6226    if stmt.order_by.len() != 1 {
6227        return None;
6228    }
6229    let order = &stmt.order_by[0];
6230    // NSW kNN returns rows ascending by distance — DESC inverts the
6231    // natural order, so the planner can't handle it without a sort
6232    // pass. Fall back to the generic ORDER BY path.
6233    if order.desc {
6234        return None;
6235    }
6236    let Expr::Binary { lhs, op, rhs } = &order.expr else {
6237        return None;
6238    };
6239    let metric = match op {
6240        BinOp::L2Distance => spg_storage::NswMetric::L2,
6241        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
6242        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
6243        _ => return None,
6244    };
6245    // Accept both `col <op> literal` and `literal <op> col`.
6246    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
6247        (lhs.as_ref(), rhs.as_ref())
6248    else {
6249        return None;
6250    };
6251    if let Some(q) = &col.qualifier
6252        && q != table_alias
6253    {
6254        return None;
6255    }
6256    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
6257    let query = literal_to_vector(literal)?;
6258    let idx = spg_storage::nsw_index_on(table, col_pos)?;
6259    if let Some(where_expr) = &stmt.where_ {
6260        // Over-fetch and filter. The factor (10×) is a heuristic that
6261        // covers typical selectivity for the corpus tests; v2.x will
6262        // make it configurable.
6263        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
6264        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
6265        let ctx = EvalContext::new(schema_cols, Some(table_alias));
6266        let mut kept: Vec<usize> = Vec::with_capacity(limit);
6267        for i in candidates {
6268            let row = &table.rows()[i];
6269            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
6270            if matches!(cond, Value::Bool(true)) {
6271                kept.push(i);
6272                if kept.len() >= limit {
6273                    break;
6274                }
6275            }
6276        }
6277        Some(kept)
6278    } else {
6279        Some(spg_storage::nsw_query(
6280            table, &idx.name, &query, limit, metric,
6281        ))
6282    }
6283}
6284
6285/// Lower bound on the over-fetch pool when WHERE is present — even
6286/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
6287/// few WHERE rejections.
6288const NSW_OVER_FETCH_FLOOR: usize = 32;
6289
6290/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
6291/// `None` for anything we can't fold at plan time.
6292fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
6293    match e {
6294        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
6295        Expr::Cast { expr, .. } => literal_to_vector(expr),
6296        _ => None,
6297    }
6298}
6299
6300/// Materialise rows in a planner-supplied order (used by the NSW path)
6301/// without re-running ORDER BY. The projection + LIMIT slot mirror the
6302/// equivalent block in `exec_bare_select`.
6303fn materialise_in_order(
6304    stmt: &SelectStatement,
6305    table: &Table,
6306    schema_cols: &[ColumnSchema],
6307    table_alias: &str,
6308    ordered_rows: &[usize],
6309) -> Result<QueryResult, EngineError> {
6310    let ctx = EvalContext::new(schema_cols, Some(table_alias));
6311    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
6312    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
6313    for &i in ordered_rows {
6314        let row = &table.rows()[i];
6315        let mut values = Vec::with_capacity(projection.len());
6316        for p in &projection {
6317            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
6318        }
6319        output_rows.push(Row::new(values));
6320    }
6321    apply_offset_and_limit(
6322        &mut output_rows,
6323        stmt.offset_literal(),
6324        stmt.limit_literal(),
6325    );
6326    let columns: Vec<ColumnSchema> = projection
6327        .into_iter()
6328        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6329        .collect();
6330    Ok(QueryResult::Rows {
6331        columns,
6332        rows: output_rows,
6333    })
6334}
6335
6336fn try_index_seek<'a>(
6337    where_expr: &Expr,
6338    schema_cols: &[ColumnSchema],
6339    catalog: &'a Catalog,
6340    table: &'a Table,
6341    table_alias: &str,
6342) -> Option<Vec<Cow<'a, Row>>> {
6343    // v7.11.3 — recurse through top-level `AND` so a PG-style
6344    // composite predicate like `WHERE id = 1 AND created_at > $1`
6345    // still hits the index on `id`. The caller re-applies the
6346    // full WHERE expression to each returned row, so dropping the
6347    // residual conjuncts here is correct — the index just narrows
6348    // the candidate set.
6349    if let Expr::Binary {
6350        lhs,
6351        op: BinOp::And,
6352        rhs,
6353    } = where_expr
6354    {
6355        // Try LHS first (typical convention: leading equality on
6356        // the indexed column comes first in user-written SQL).
6357        if let Some(rows) = try_index_seek(lhs, schema_cols, catalog, table, table_alias) {
6358            return Some(rows);
6359        }
6360        return try_index_seek(rhs, schema_cols, catalog, table, table_alias);
6361    }
6362    let Expr::Binary {
6363        lhs,
6364        op: BinOp::Eq,
6365        rhs,
6366    } = where_expr
6367    else {
6368        return None;
6369    };
6370    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
6371        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
6372    let idx = table.index_on(col_pos)?;
6373    let key = IndexKey::from_value(&value)?;
6374    let locators = idx.lookup_eq(&key);
6375    let table_name = table.schema().name.as_str();
6376    // v5.1: each locator dispatches to either the hot tier (zero-
6377    // copy borrow of `table.rows()[i]`) or a cold-tier segment
6378    // (one page read + dense row decode, ~µs scale). Cold rows are
6379    // returned as `Cow::Owned` so the caller's `&Row` iteration
6380    // doesn't see a tier distinction; pre-freezer (no cold
6381    // segments loaded) every locator is `Hot` and every entry is
6382    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
6383    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
6384    for loc in locators {
6385        match *loc {
6386            spg_storage::RowLocator::Hot(i) => {
6387                if let Some(row) = table.rows().get(i) {
6388                    out.push(Cow::Borrowed(row));
6389                }
6390            }
6391            spg_storage::RowLocator::Cold { segment_id, .. } => {
6392                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
6393                    out.push(Cow::Owned(row));
6394                }
6395            }
6396        }
6397    }
6398    Some(out)
6399}
6400
6401/// v7.12.3 — GIN-accelerated candidate seek for `WHERE col @@ <ts_query>`.
6402///
6403/// Recurses through top-level `AND` like [`try_index_seek`] so a
6404/// composite predicate `WHERE search_vector @@ q AND id > $1` still
6405/// hits the GIN index on `search_vector` — the caller re-applies the
6406/// full WHERE expression to each returned candidate, so dropping the
6407/// `id > $1` residual here stays semantically correct.
6408///
6409/// Returns `None` when:
6410///   - no leaf is a `col @@ <rhs>` shape on a GIN-indexed column;
6411///   - the RHS can't be const-evaluated to a `Value::TsQuery`
6412///     (typically because it references row columns);
6413///   - the resolved `TsQuery` uses query shapes the MVP doesn't
6414///     accelerate (`Not`, `Phrase` — those fall through to full scan).
6415///
6416/// On `Some(rows)` the caller iterates only `rows` and re-evaluates
6417/// the full `@@` predicate per row, so an over-approximate candidate
6418/// set is safe.
6419fn try_gin_seek<'a>(
6420    where_expr: &Expr,
6421    schema_cols: &[ColumnSchema],
6422    catalog: &'a Catalog,
6423    table: &'a Table,
6424    table_alias: &str,
6425    ctx: &eval::EvalContext<'_>,
6426) -> Option<Vec<Cow<'a, Row>>> {
6427    if let Expr::Binary {
6428        lhs,
6429        op: BinOp::And,
6430        rhs,
6431    } = where_expr
6432    {
6433        if let Some(rows) = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx) {
6434            return Some(rows);
6435        }
6436        return try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx);
6437    }
6438    let Expr::Binary {
6439        lhs,
6440        op: BinOp::TsMatch,
6441        rhs,
6442    } = where_expr
6443    else {
6444        return None;
6445    };
6446    // Either side can be the column; pgvector idiom (`vec @@ q`)
6447    // hits the first arm, FROM-clause-derived (`plainto_tsquery($1)
6448    // q ... WHERE search_vector @@ q`) the same. CROSS JOIN derived
6449    // tables resolve `q` to a Column too.
6450    let (col_pos, query) = resolve_gin_col_query(lhs, rhs, schema_cols, table_alias, ctx)
6451        .or_else(|| resolve_gin_col_query(rhs, lhs, schema_cols, table_alias, ctx))?;
6452    let idx = table
6453        .indices()
6454        .iter()
6455        .find(|i| i.column_position == col_pos && i.is_gin())?;
6456    let candidates = gin_query_candidates(idx, &query)?;
6457    let _ = catalog; // cold-tier row resolution unused in MVP; see below.
6458    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(candidates.len());
6459    for loc in candidates {
6460        match loc {
6461            spg_storage::RowLocator::Hot(i) => {
6462                if let Some(row) = table.rows().get(i) {
6463                    out.push(Cow::Borrowed(row));
6464                }
6465            }
6466            // GIN cold-tier rows in the MVP: skipped, matching the
6467            // full-scan `@@` path which itself only iterates
6468            // `table.rows()` (hot tier). When v7.13+ adds cold-tier
6469            // scan-time materialisation for `@@`, the parallel
6470            // resolution lands here; until then both paths see the
6471            // same hot-only candidate set so correctness is preserved.
6472            spg_storage::RowLocator::Cold { .. } => {}
6473        }
6474    }
6475    Some(out)
6476}
6477
6478/// v7.15.0 — trigram-GIN-accelerated candidate seek for
6479/// `WHERE col LIKE '<pat>'` and `WHERE col ILIKE '<pat>'` when
6480/// the column has a `gin_trgm_ops` GIN index.
6481///
6482/// Walks top-level `AND` so multi-predicate WHEREs (`col LIKE
6483/// 'foo%' AND id > 1`) still hit the trigram index; the caller
6484/// re-evaluates the full WHERE per candidate row, so dropping
6485/// non-LIKE conjuncts here stays semantically correct.
6486///
6487/// Returns `None` when:
6488///   - no leaf is `col LIKE/ILIKE <literal>` on a trigram-GIN-
6489///     indexed column;
6490///   - the pattern's literal runs are too short to constrain
6491///     (pattern decomposes into `< 3`-char runs, e.g. `%ab%`);
6492///   - the pattern doesn't const-evaluate to a TEXT.
6493fn try_trgm_seek<'a>(
6494    where_expr: &Expr,
6495    schema_cols: &[ColumnSchema],
6496    table: &'a Table,
6497    table_alias: &str,
6498) -> Option<Vec<Cow<'a, Row>>> {
6499    if let Expr::Binary {
6500        lhs,
6501        op: BinOp::And,
6502        rhs,
6503    } = where_expr
6504    {
6505        if let Some(rows) = try_trgm_seek(lhs, schema_cols, table, table_alias) {
6506            return Some(rows);
6507        }
6508        return try_trgm_seek(rhs, schema_cols, table, table_alias);
6509    }
6510    // LIKE node is what carries the column reference + pattern.
6511    // ILIKE is the same AST node — PG's LIKE/ILIKE both lower
6512    // through `Expr::Like { expr, pattern, negated }`. The trigram
6513    // index posting-list keys are already lower-cased and
6514    // case-folded, so we only need the pattern's literal runs.
6515    let Expr::Like { expr, pattern, .. } = where_expr else {
6516        return None;
6517    };
6518    // Column side.
6519    let Expr::Column(c) = expr.as_ref() else {
6520        return None;
6521    };
6522    if let Some(q) = &c.qualifier
6523        && q != table_alias
6524    {
6525        return None;
6526    }
6527    let col_pos = schema_cols
6528        .iter()
6529        .position(|s| s.name.eq_ignore_ascii_case(&c.name))?;
6530    // Index must exist on that column AND be a trigram-GIN.
6531    let idx = table
6532        .indices()
6533        .iter()
6534        .find(|i| i.column_position == col_pos && i.is_gin_trgm())?;
6535    // Pattern side must be a literal TEXT — anything else (column
6536    // ref, function call, parameter that hasn't been bound yet)
6537    // falls through to full scan.
6538    let Expr::Literal(spg_sql::ast::Literal::String(pat)) = pattern.as_ref() else {
6539        return None;
6540    };
6541    let trigrams = spg_storage::trgm::trigrams_from_like_pattern(pat)?;
6542    // Intersect every trigram's posting list. Empty intersection
6543    // → empty candidate set (caller short-circuits its row loop).
6544    let mut iter = trigrams.iter();
6545    let first = iter.next()?;
6546    let mut acc: Vec<spg_storage::RowLocator> = {
6547        let mut v = idx.gin_trgm_lookup(first).to_vec();
6548        v.sort_by_key(locator_sort_key);
6549        v.dedup_by_key(|l| locator_sort_key(l));
6550        v
6551    };
6552    for tri in iter {
6553        let mut next: Vec<spg_storage::RowLocator> = idx.gin_trgm_lookup(tri).to_vec();
6554        next.sort_by_key(locator_sort_key);
6555        next.dedup_by_key(|l| locator_sort_key(l));
6556        // Sorted-merge intersection.
6557        let mut merged: Vec<spg_storage::RowLocator> =
6558            Vec::with_capacity(acc.len().min(next.len()));
6559        let (mut i, mut j) = (0usize, 0usize);
6560        while i < acc.len() && j < next.len() {
6561            let lk = locator_sort_key(&acc[i]);
6562            let rk = locator_sort_key(&next[j]);
6563            match lk.cmp(&rk) {
6564                core::cmp::Ordering::Less => i += 1,
6565                core::cmp::Ordering::Greater => j += 1,
6566                core::cmp::Ordering::Equal => {
6567                    merged.push(acc[i]);
6568                    i += 1;
6569                    j += 1;
6570                }
6571            }
6572        }
6573        acc = merged;
6574        if acc.is_empty() {
6575            break;
6576        }
6577    }
6578    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(acc.len());
6579    for loc in acc {
6580        if let spg_storage::RowLocator::Hot(i) = loc
6581            && let Some(row) = table.rows().get(i)
6582        {
6583            out.push(Cow::Borrowed(row));
6584        }
6585        // Cold-tier rows: skipped in MVP (same as try_gin_seek).
6586    }
6587    Some(out)
6588}
6589
6590/// v7.12.3 — extract `(column_position, TsQueryAst)` when one side of
6591/// the binary is a column reference to a GIN-indexed tsvector column
6592/// and the other side const-evaluates to a `Value::TsQuery`. Returns
6593/// `None` if the column reference is for the wrong table alias, or if
6594/// the RHS expression depends on row data.
6595fn resolve_gin_col_query(
6596    col_side: &Expr,
6597    query_side: &Expr,
6598    schema_cols: &[ColumnSchema],
6599    table_alias: &str,
6600    ctx: &eval::EvalContext<'_>,
6601) -> Option<(usize, spg_storage::TsQueryAst)> {
6602    let Expr::Column(c) = col_side else {
6603        return None;
6604    };
6605    if let Some(q) = &c.qualifier
6606        && q != table_alias
6607    {
6608        return None;
6609    }
6610    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6611    // Const-evaluate the query side with an empty row — fails fast
6612    // (with a `ColumnNotFound` / similar) if the expression actually
6613    // depends on row data, which is exactly the bail signal we want.
6614    let empty_row = Row::new(Vec::new());
6615    let v = eval::eval_expr(query_side, &empty_row, ctx).ok()?;
6616    let Value::TsQuery(q) = v else { return None };
6617    Some((pos, q))
6618}
6619
6620/// v7.12.3 — walk a `TsQueryAst` against an [`IndexKind::Gin`] index
6621/// to produce a candidate row-locator set. Returns `None` for query
6622/// shapes the MVP doesn't accelerate (`Not` / `Phrase` — both bail to
6623/// full scan since their semantics need either complementation across
6624/// the whole row set or positional verification beyond what the
6625/// posting list carries).
6626///
6627/// Candidate sets are over-approximate — the caller re-applies the
6628/// full `@@` predicate per row, so reporting "row was in some
6629/// posting list" without verifying positions / weights stays correct.
6630fn gin_query_candidates(
6631    idx: &spg_storage::Index,
6632    query: &spg_storage::TsQueryAst,
6633) -> Option<Vec<spg_storage::RowLocator>> {
6634    use spg_storage::TsQueryAst;
6635    match query {
6636        TsQueryAst::Term { word, .. } => {
6637            let mut v: Vec<spg_storage::RowLocator> = idx.gin_lookup_word(word).to_vec();
6638            v.sort_by_key(locator_sort_key);
6639            v.dedup_by_key(|l| locator_sort_key(l));
6640            Some(v)
6641        }
6642        TsQueryAst::And(l, r) => {
6643            let mut left = gin_query_candidates(idx, l)?;
6644            let mut right = gin_query_candidates(idx, r)?;
6645            left.sort_by_key(locator_sort_key);
6646            right.sort_by_key(locator_sort_key);
6647            // Sorted-merge intersection.
6648            let mut out: Vec<spg_storage::RowLocator> = Vec::new();
6649            let (mut i, mut j) = (0usize, 0usize);
6650            while i < left.len() && j < right.len() {
6651                let lk = locator_sort_key(&left[i]);
6652                let rk = locator_sort_key(&right[j]);
6653                match lk.cmp(&rk) {
6654                    core::cmp::Ordering::Less => i += 1,
6655                    core::cmp::Ordering::Greater => j += 1,
6656                    core::cmp::Ordering::Equal => {
6657                        out.push(left[i]);
6658                        i += 1;
6659                        j += 1;
6660                    }
6661                }
6662            }
6663            Some(out)
6664        }
6665        TsQueryAst::Or(l, r) => {
6666            let mut out = gin_query_candidates(idx, l)?;
6667            out.extend(gin_query_candidates(idx, r)?);
6668            out.sort_by_key(locator_sort_key);
6669            out.dedup_by_key(|l| locator_sort_key(l));
6670            Some(out)
6671        }
6672        // Not / Phrase bail to full scan in the MVP. Not needs
6673        // complementation against the whole row set (not represented
6674        // in the posting-list view); Phrase needs positional
6675        // verification beyond what `word → rows` carries.
6676        TsQueryAst::Not(_) | TsQueryAst::Phrase { .. } => None,
6677    }
6678}
6679
6680/// v7.12.3 — total ordering on `RowLocator` for sort/dedup purposes
6681/// inside the GIN intersection / union loops. Hot rows order by their
6682/// row index; Cold rows order after all Hot rows, then by
6683/// `(segment_id, the cold sub-key)`.
6684fn locator_sort_key(l: &spg_storage::RowLocator) -> (u8, u64, u64) {
6685    match *l {
6686        spg_storage::RowLocator::Hot(i) => (0, i as u64, 0),
6687        spg_storage::RowLocator::Cold {
6688            segment_id,
6689            page_offset,
6690        } => (1, u64::from(segment_id), u64::from(page_offset)),
6691    }
6692}
6693
6694/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
6695/// is a simple `col = literal` predicate suitable for a `BTree` index
6696/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
6697/// decide whether a write touches a cold-tier row (which requires
6698/// promote-on-write / shadow-on-delete) before falling through to
6699/// the hot-tier row walk.
6700///
6701/// Returns `None` for any predicate shape the planner can't push
6702/// down to an index seek — complex WHERE clauses always take the
6703/// hot-only path (cold rows are immutable to non-indexed writes
6704/// until a future scan-fanout sub-version).
6705fn try_pk_predicate(
6706    where_expr: &Expr,
6707    schema_cols: &[ColumnSchema],
6708    table_alias: &str,
6709) -> Option<(usize, IndexKey)> {
6710    let Expr::Binary {
6711        lhs,
6712        op: BinOp::Eq,
6713        rhs,
6714    } = where_expr
6715    else {
6716        return None;
6717    };
6718    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
6719        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
6720    let key = IndexKey::from_value(&value)?;
6721    Some((col_pos, key))
6722}
6723
6724fn resolve_col_literal_pair(
6725    col_side: &Expr,
6726    lit_side: &Expr,
6727    schema_cols: &[ColumnSchema],
6728    table_alias: &str,
6729) -> Option<(usize, Value)> {
6730    let Expr::Column(c) = col_side else {
6731        return None;
6732    };
6733    if let Some(q) = &c.qualifier
6734        && q != table_alias
6735    {
6736        return None;
6737    }
6738    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6739    let Expr::Literal(l) = lit_side else {
6740        return None;
6741    };
6742    let v = match l {
6743        Literal::Integer(n) => {
6744            if let Ok(small) = i32::try_from(*n) {
6745                Value::Int(small)
6746            } else {
6747                Value::BigInt(*n)
6748            }
6749        }
6750        Literal::Float(x) => Value::Float(*x),
6751        Literal::String(s) => Value::Text(s.clone()),
6752        Literal::Bool(b) => Value::Bool(*b),
6753        Literal::Null => Value::Null,
6754        // Vector and Interval literals can't be used as B-tree index keys.
6755        // Tell the planner to fall back to full-scan.
6756        Literal::Vector(_) | Literal::Interval { .. } => return None,
6757    };
6758    Some((pos, v))
6759}
6760
6761/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
6762/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
6763/// `EngineError` so the projection-build path keeps `UnknownQualifier`
6764/// vs `ColumnNotFound` distinct.
6765fn resolve_projection_column<'a>(
6766    c: &ColumnName,
6767    schema_cols: &'a [ColumnSchema],
6768    table_alias: &str,
6769) -> Result<&'a ColumnSchema, EngineError> {
6770    if let Some(q) = &c.qualifier {
6771        let composite = alloc::format!("{q}.{name}", name = c.name);
6772        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
6773            return Ok(s);
6774        }
6775        // Single-table case: the qualifier may equal the active alias —
6776        // then look for the bare column name.
6777        if q == table_alias
6778            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
6779        {
6780            return Ok(s);
6781        }
6782        // For multi-table schemas the qualifier is unknown only if no
6783        // column bears the "<q>." prefix. For single-table, the alias
6784        // mismatch alone is enough.
6785        let prefix = alloc::format!("{q}.");
6786        let qualifier_known =
6787            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
6788        if !qualifier_known {
6789            return Err(EngineError::Eval(EvalError::UnknownQualifier {
6790                qualifier: q.clone(),
6791            }));
6792        }
6793        return Err(EngineError::Eval(EvalError::ColumnNotFound {
6794            name: c.name.clone(),
6795        }));
6796    }
6797    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
6798        return Ok(s);
6799    }
6800    let suffix = alloc::format!(".{name}", name = c.name);
6801    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
6802    let first = matches.next();
6803    let extra = matches.next();
6804    match (first, extra) {
6805        (Some(s), None) => Ok(s),
6806        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
6807            detail: alloc::format!("ambiguous column reference: {}", c.name),
6808        })),
6809        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
6810            name: c.name.clone(),
6811        })),
6812    }
6813}
6814
6815fn build_projection(
6816    items: &[SelectItem],
6817    schema_cols: &[ColumnSchema],
6818    table_alias: &str,
6819) -> Result<Vec<ProjectedItem>, EngineError> {
6820    let mut out = Vec::new();
6821    for item in items {
6822        match item {
6823            SelectItem::Wildcard => {
6824                for col in schema_cols {
6825                    out.push(ProjectedItem {
6826                        expr: Expr::Column(ColumnName {
6827                            qualifier: None,
6828                            name: col.name.clone(),
6829                        }),
6830                        output_name: col.name.clone(),
6831                        ty: col.ty,
6832                        nullable: col.nullable,
6833                    });
6834                }
6835            }
6836            SelectItem::Expr { expr, alias } => {
6837                // Plain column ref keeps full schema info (real type +
6838                // nullability). Compound expressions evaluate fine but have
6839                // no static type — surface them as nullable TEXT, which is
6840                // what most clients render anyway.
6841                if let Expr::Column(c) = expr {
6842                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
6843                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
6844                    out.push(ProjectedItem {
6845                        expr: expr.clone(),
6846                        output_name,
6847                        ty: sch.ty,
6848                        nullable: sch.nullable,
6849                    });
6850                } else {
6851                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
6852                    out.push(ProjectedItem {
6853                        expr: expr.clone(),
6854                        output_name,
6855                        ty: DataType::Text,
6856                        nullable: true,
6857                    });
6858                }
6859            }
6860        }
6861    }
6862    Ok(out)
6863}
6864
6865/// Promote an integer to a NUMERIC value at the requested scale.
6866/// Rejects values that, after scaling, would overflow the column's
6867/// precision budget.
6868fn numeric_from_integer(
6869    n: i128,
6870    precision: u8,
6871    scale: u8,
6872    col_name: &str,
6873) -> Result<Value, EngineError> {
6874    let factor = pow10_i128(scale);
6875    let scaled = n.checked_mul(factor).ok_or_else(|| {
6876        EngineError::Unsupported(alloc::format!(
6877            "integer overflow scaling value for column `{col_name}` to scale {scale}"
6878        ))
6879    })?;
6880    check_precision(scaled, precision, col_name)?;
6881    Ok(Value::Numeric { scaled, scale })
6882}
6883
6884/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
6885/// then verifies the result fits the column's precision.
6886#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
6887fn numeric_from_float(
6888    x: f64,
6889    precision: u8,
6890    scale: u8,
6891    col_name: &str,
6892) -> Result<Value, EngineError> {
6893    if !x.is_finite() {
6894        return Err(EngineError::Unsupported(alloc::format!(
6895            "cannot store non-finite float in NUMERIC column `{col_name}`"
6896        )));
6897    }
6898    let mut factor = 1.0_f64;
6899    for _ in 0..scale {
6900        factor *= 10.0;
6901    }
6902    // Round half-away-from-zero by biasing then casting (`as i128`
6903    // truncates toward zero, so the bias + truncation gives the
6904    // desired rounding). `f64::floor` / `ceil` live in std; we don't
6905    // need them — the cast handles the truncation step.
6906    let shifted = x * factor;
6907    let biased = if shifted >= 0.0 {
6908        shifted + 0.5
6909    } else {
6910        shifted - 0.5
6911    };
6912    // Range-check before casting back to i128 — the cast itself is
6913    // saturating in Rust, which would silently truncate huge inputs.
6914    if !(-1e38..=1e38).contains(&biased) {
6915        return Err(EngineError::Unsupported(alloc::format!(
6916            "value {x} overflows NUMERIC range for column `{col_name}`"
6917        )));
6918    }
6919    let scaled = biased as i128;
6920    check_precision(scaled, precision, col_name)?;
6921    Ok(Value::Numeric { scaled, scale })
6922}
6923
6924/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
6925/// multiplies by 10; going down rounds half-away-from-zero.
6926fn numeric_rescale(
6927    scaled: i128,
6928    src_scale: u8,
6929    precision: u8,
6930    dst_scale: u8,
6931    col_name: &str,
6932) -> Result<Value, EngineError> {
6933    let new_scaled = if dst_scale >= src_scale {
6934        let bump = pow10_i128(dst_scale - src_scale);
6935        scaled.checked_mul(bump).ok_or_else(|| {
6936            EngineError::Unsupported(alloc::format!(
6937                "overflow rescaling NUMERIC for column `{col_name}`"
6938            ))
6939        })?
6940    } else {
6941        let drop = pow10_i128(src_scale - dst_scale);
6942        let half = drop / 2;
6943        if scaled >= 0 {
6944            (scaled + half) / drop
6945        } else {
6946            (scaled - half) / drop
6947        }
6948    };
6949    check_precision(new_scaled, precision, col_name)?;
6950    Ok(Value::Numeric {
6951        scaled: new_scaled,
6952        scale: dst_scale,
6953    })
6954}
6955
6956/// Drop the fractional part of a scaled integer, returning the integer
6957/// portion (toward zero). Used for NUMERIC → INT casts.
6958const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
6959    if scale == 0 {
6960        return scaled;
6961    }
6962    let factor = pow10_i128_const(scale);
6963    scaled / factor
6964}
6965
6966/// Verify a scaled NUMERIC value fits the column's declared precision.
6967/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
6968/// skip the check there.
6969fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
6970    if precision == 0 {
6971        return Ok(());
6972    }
6973    let limit = pow10_i128(precision);
6974    if scaled.unsigned_abs() >= limit.unsigned_abs() {
6975        return Err(EngineError::Unsupported(alloc::format!(
6976            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
6977        )));
6978    }
6979    Ok(())
6980}
6981
6982const fn pow10_i128_const(p: u8) -> i128 {
6983    let mut acc: i128 = 1;
6984    let mut i = 0;
6985    while i < p {
6986        acc *= 10;
6987        i += 1;
6988    }
6989    acc
6990}
6991
6992fn pow10_i128(p: u8) -> i128 {
6993    pow10_i128_const(p)
6994}
6995
6996/// Walk a parsed `Statement`, swapping any `NOW()` /
6997/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
6998/// literal cast that wraps the engine's per-statement clock reading.
6999/// When `now_micros` is `None`, calls stay as-is and surface as
7000/// `unknown function` at eval time — keeps the error path explicit.
7001/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
7002/// replace every subquery node with a materialised literal. SPG
7003/// only supports uncorrelated subqueries — the inner SELECT does
7004/// not see outer-row columns, so the result is the same for every
7005/// outer row and can be evaluated once.
7006///
7007/// Returns the rewritten statement; the caller passes this to the
7008/// regular row-loop executor which no longer sees Subquery nodes
7009/// in its tree.
7010impl Engine {
7011    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
7012    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
7013    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
7014    /// 1. Apply the WHERE filter.
7015    /// 2. For each unique `WindowFunction` node in the projection,
7016    ///    partition + sort, compute the per-row value.
7017    /// 3. Append the window values as synthetic columns (`__win_N`)
7018    ///    to the row schema.
7019    /// 4. Rewrite the projection to read those columns.
7020    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
7021    #[allow(
7022        clippy::too_many_lines,
7023        clippy::type_complexity,
7024        clippy::needless_range_loop
7025    )] // window-eval is one cohesive pipe; splitting fragments
7026    fn exec_select_with_window(
7027        &self,
7028        stmt: &SelectStatement,
7029        cancel: CancelToken<'_>,
7030    ) -> Result<QueryResult, EngineError> {
7031        let from = stmt.from.as_ref().ok_or_else(|| {
7032            EngineError::Unsupported("window functions require a FROM clause".into())
7033        })?;
7034        // For v4.12 we only support a single-table FROM. Joins +
7035        // windows is queued for v5.x.
7036        if !from.joins.is_empty() {
7037            return Err(EngineError::Unsupported(
7038                "JOIN with window functions not yet supported".into(),
7039            ));
7040        }
7041        let primary = &from.primary;
7042        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
7043            StorageError::TableNotFound {
7044                name: primary.name.clone(),
7045            }
7046        })?;
7047        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
7048        let schema_cols = &table.schema().columns;
7049        let ctx = self.ev_ctx(schema_cols, Some(alias));
7050
7051        // 1) Filter pass.
7052        let mut filtered: Vec<&Row> = Vec::new();
7053        for (i, row) in table.rows().iter().enumerate() {
7054            if i.is_multiple_of(256) {
7055                cancel.check()?;
7056            }
7057            if let Some(w) = &stmt.where_ {
7058                let cond = eval::eval_expr(w, row, &ctx)?;
7059                if !matches!(cond, Value::Bool(true)) {
7060                    continue;
7061                }
7062            }
7063            filtered.push(row);
7064        }
7065        let n_rows = filtered.len();
7066
7067        // 2) Collect unique window function nodes from projection.
7068        let mut window_nodes: Vec<Expr> = Vec::new();
7069        for item in &stmt.items {
7070            if let SelectItem::Expr { expr, .. } = item {
7071                collect_window_nodes(expr, &mut window_nodes);
7072            }
7073        }
7074
7075        // 3) For each window, compute per-row value.
7076        // Index: same order as window_nodes; for row i, win_vals[w][i].
7077        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
7078        for wnode in &window_nodes {
7079            let Expr::WindowFunction {
7080                name,
7081                args,
7082                partition_by,
7083                order_by,
7084                frame,
7085                null_treatment,
7086            } = wnode
7087            else {
7088                unreachable!("collect_window_nodes pushes only WindowFunction");
7089            };
7090            // Compute (partition_key, order_key, original_index) for each row.
7091            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool)>, usize)> =
7092                Vec::with_capacity(n_rows);
7093            for (i, row) in filtered.iter().enumerate() {
7094                let pkey: Vec<Value> = partition_by
7095                    .iter()
7096                    .map(|p| eval::eval_expr(p, row, &ctx))
7097                    .collect::<Result<_, _>>()?;
7098                let okey: Vec<(Value, bool)> = order_by
7099                    .iter()
7100                    .map(|(e, desc)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc)))
7101                    .collect::<Result<_, _>>()?;
7102                indexed.push((pkey, okey, i));
7103            }
7104            // Sort by (partition_key, order_key). Partition key uses
7105            // a stable encoded form; order key respects ASC/DESC.
7106            indexed.sort_by(|a, b| {
7107                let p_cmp = partition_key_cmp(&a.0, &b.0);
7108                if p_cmp != core::cmp::Ordering::Equal {
7109                    return p_cmp;
7110                }
7111                order_key_cmp(&a.1, &b.1)
7112            });
7113            // Per-partition compute.
7114            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
7115            let mut p_start = 0;
7116            while p_start < indexed.len() {
7117                let mut p_end = p_start + 1;
7118                while p_end < indexed.len()
7119                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
7120                        == core::cmp::Ordering::Equal
7121                {
7122                    p_end += 1;
7123                }
7124                // Compute the function within this partition slice.
7125                compute_window_partition(
7126                    name,
7127                    args,
7128                    !order_by.is_empty(),
7129                    frame.as_ref(),
7130                    *null_treatment,
7131                    &indexed[p_start..p_end],
7132                    &filtered,
7133                    &ctx,
7134                    &mut out_vals,
7135                )?;
7136                p_start = p_end;
7137            }
7138            win_vals.push(out_vals);
7139        }
7140
7141        // 4) Build extended schema: original columns + synthetic.
7142        let mut ext_cols = schema_cols.clone();
7143        for i in 0..window_nodes.len() {
7144            ext_cols.push(ColumnSchema::new(
7145                alloc::format!("__win_{i}"),
7146                DataType::Text, // type doesn't matter for projection eval
7147                true,
7148            ));
7149        }
7150        // 5) Build extended rows: each row gets its window values appended.
7151        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
7152        for i in 0..n_rows {
7153            let mut values = filtered[i].values.clone();
7154            for w in 0..window_nodes.len() {
7155                values.push(win_vals[w][i].clone());
7156            }
7157            ext_rows.push(Row::new(values));
7158        }
7159        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
7160        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
7161        for item in &stmt.items {
7162            let new_item = match item {
7163                SelectItem::Wildcard => SelectItem::Wildcard,
7164                SelectItem::Expr { expr, alias } => {
7165                    let mut e = expr.clone();
7166                    rewrite_window_to_columns(&mut e, &window_nodes);
7167                    SelectItem::Expr {
7168                        expr: e,
7169                        alias: alias.clone(),
7170                    }
7171                }
7172            };
7173            rewritten_items.push(new_item);
7174        }
7175
7176        // 7) Project into final rows.
7177        let ext_ctx = EvalContext::new(&ext_cols, Some(alias));
7178        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
7179        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
7180        for (i, row) in ext_rows.iter().enumerate() {
7181            if i.is_multiple_of(256) {
7182                cancel.check()?;
7183            }
7184            let mut values = Vec::with_capacity(projection.len());
7185            for p in &projection {
7186                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
7187            }
7188            let order_keys = if stmt.order_by.is_empty() {
7189                Vec::new()
7190            } else {
7191                let mut keys = Vec::with_capacity(stmt.order_by.len());
7192                for o in &stmt.order_by {
7193                    let mut e = o.expr.clone();
7194                    rewrite_window_to_columns(&mut e, &window_nodes);
7195                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
7196                    keys.push(value_to_order_key(&key)?);
7197                }
7198                keys
7199            };
7200            tagged.push((order_keys, Row::new(values)));
7201        }
7202        // ORDER BY + LIMIT/OFFSET on the projected rows.
7203        if !stmt.order_by.is_empty() {
7204            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
7205            sort_by_keys(&mut tagged, &descs);
7206        }
7207        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
7208        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
7209        let final_cols: Vec<ColumnSchema> = projection
7210            .into_iter()
7211            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
7212            .collect();
7213        Ok(QueryResult::Rows {
7214            columns: final_cols,
7215            rows: out_rows,
7216        })
7217    }
7218
7219    /// v4.11: materialise each CTE into a temp table inside a
7220    /// cloned catalog, then run the body SELECT against a fresh
7221    /// engine instance that owns the enriched catalog. The clone
7222    /// is moderately expensive — only paid by CTE-bearing queries.
7223    /// Subqueries inside CTE bodies / the main body resolve as
7224    /// usual; `clock_fn` is propagated so `NOW()` lines up.
7225    /// v7.16.2 — mailrs round-10 A.3. Materialise the
7226    /// `information_schema.*` / `pg_catalog.*` virtual views
7227    /// the SELECT references, then re-execute the SELECT
7228    /// against an enriched catalog where those views are real
7229    /// tables. Same pattern as `exec_with_ctes`. The temp
7230    /// engine carries `meta_views_materialised = true` so its
7231    /// own meta-dispatch short-circuits — without that we'd
7232    /// infinite-recurse since the temp catalog's view name
7233    /// still starts with `__spg_info_` and re-triggers the
7234    /// check.
7235    fn exec_select_with_meta_views(
7236        &self,
7237        stmt: &SelectStatement,
7238        cancel: CancelToken<'_>,
7239    ) -> Result<QueryResult, EngineError> {
7240        let mut needed: alloc::collections::BTreeSet<String> = alloc::collections::BTreeSet::new();
7241        collect_meta_view_names(stmt, &mut needed);
7242        let mut catalog = self.active_catalog().clone();
7243        for view in &needed {
7244            if catalog.get(view).is_some() {
7245                continue;
7246            }
7247            match view.as_str() {
7248                "__spg_info_columns" => {
7249                    let (schema, rows) = synth_information_schema_columns(self.active_catalog());
7250                    materialise_meta_view(&mut catalog, view, schema, rows)?;
7251                }
7252                "__spg_info_tables" => {
7253                    let (schema, rows) = synth_information_schema_tables(self.active_catalog());
7254                    materialise_meta_view(&mut catalog, view, schema, rows)?;
7255                }
7256                "__spg_pg_class" => {
7257                    let (schema, rows) = synth_pg_class(self.active_catalog());
7258                    materialise_meta_view(&mut catalog, view, schema, rows)?;
7259                }
7260                "__spg_pg_attribute" => {
7261                    let (schema, rows) = synth_pg_attribute(self.active_catalog());
7262                    materialise_meta_view(&mut catalog, view, schema, rows)?;
7263                }
7264                _ => {
7265                    return Err(EngineError::Unsupported(alloc::format!(
7266                        "meta view {view:?} is not yet materialisable; \
7267                         v7.16.2 covers information_schema.columns / .tables \
7268                         and pg_catalog.pg_class / pg_attribute"
7269                    )));
7270                }
7271            }
7272        }
7273        let mut temp = Engine::restore(catalog);
7274        if let Some(c) = self.clock {
7275            temp = temp.with_clock(c);
7276        }
7277        if let Some(f) = self.salt_fn {
7278            temp = temp.with_salt_fn(f);
7279        }
7280        temp.meta_views_materialised = true;
7281        temp.exec_select_cancel(stmt, cancel)
7282    }
7283
7284    fn exec_with_ctes(
7285        &self,
7286        stmt: &SelectStatement,
7287        cancel: CancelToken<'_>,
7288    ) -> Result<QueryResult, EngineError> {
7289        cancel.check()?;
7290        let mut catalog = self.active_catalog().clone();
7291        for cte in &stmt.ctes {
7292            if catalog.get(&cte.name).is_some() {
7293                return Err(EngineError::Unsupported(alloc::format!(
7294                    "CTE name {:?} shadows an existing table; rename the CTE",
7295                    cte.name
7296                )));
7297            }
7298            let (columns, rows) = if cte.recursive {
7299                self.materialise_recursive_cte(cte, &catalog, cancel)?
7300            } else {
7301                let body_result = self.exec_select_cancel(&cte.body, cancel)?;
7302                let QueryResult::Rows { columns, rows } = body_result else {
7303                    return Err(EngineError::Unsupported(alloc::format!(
7304                        "CTE {:?} body did not return rows",
7305                        cte.name
7306                    )));
7307                };
7308                (columns, rows)
7309            };
7310            // v4.22: the projection builder labels any non-column
7311            // expression as Text — including literal SELECT 1.
7312            // Promote each column's type to whatever the rows
7313            // actually carry so the CTE storage table accepts them.
7314            let inferred = infer_column_types(&columns, &rows);
7315            let mut columns = inferred;
7316            // v4.22: apply optional `WITH name(a, b, c)` overrides.
7317            if !cte.column_overrides.is_empty() {
7318                if cte.column_overrides.len() != columns.len() {
7319                    return Err(EngineError::Unsupported(alloc::format!(
7320                        "CTE {:?} column list has {} names but body returns {} columns",
7321                        cte.name,
7322                        cte.column_overrides.len(),
7323                        columns.len()
7324                    )));
7325                }
7326                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
7327                    col.name.clone_from(name);
7328                }
7329            }
7330            let schema = TableSchema::new(cte.name.clone(), columns);
7331            catalog.create_table(schema).map_err(EngineError::Storage)?;
7332            let table = catalog
7333                .get_mut(&cte.name)
7334                .expect("just-created CTE table must exist");
7335            for row in rows {
7336                table.insert(row).map_err(EngineError::Storage)?;
7337            }
7338        }
7339        // Strip CTEs from the body before running on the temp engine
7340        // so we don't recurse forever.
7341        let mut body = stmt.clone();
7342        body.ctes = Vec::new();
7343        let mut temp = Engine::restore(catalog);
7344        if let Some(c) = self.clock {
7345            temp = temp.with_clock(c);
7346        }
7347        if let Some(f) = self.salt_fn {
7348            temp = temp.with_salt_fn(f);
7349        }
7350        temp.exec_select_cancel(&body, cancel)
7351    }
7352
7353    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
7354    /// UNION (or UNION ALL) of an anchor that does not reference
7355    /// the CTE name, and one or more recursive terms that do. The
7356    /// anchor runs first; each subsequent iteration runs the
7357    /// recursive term against a temp catalog where the CTE name is
7358    /// bound to the *previous* iteration's output. Iteration stops
7359    /// when the recursive term yields no rows; UNION (DISTINCT)
7360    /// deduplicates against the accumulated result, UNION ALL does
7361    /// not. A hard cap on total rows prevents runaway queries.
7362    #[allow(clippy::too_many_lines)]
7363    fn materialise_recursive_cte(
7364        &self,
7365        cte: &spg_sql::ast::Cte,
7366        base_catalog: &Catalog,
7367        cancel: CancelToken<'_>,
7368    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
7369        const MAX_TOTAL_ROWS: usize = 1_000_000;
7370        const MAX_ITERATIONS: usize = 100_000;
7371        cancel.check()?;
7372        if cte.body.unions.is_empty() {
7373            return Err(EngineError::Unsupported(alloc::format!(
7374                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
7375                cte.name
7376            )));
7377        }
7378        // Anchor: the body's leading SELECT, with unions stripped.
7379        let mut anchor = cte.body.clone();
7380        let union_terms = core::mem::take(&mut anchor.unions);
7381        anchor.ctes = Vec::new();
7382        // Anchor must not reference the CTE name.
7383        if select_refers_to(&anchor, &cte.name) {
7384            return Err(EngineError::Unsupported(alloc::format!(
7385                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
7386                cte.name
7387            )));
7388        }
7389        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
7390        let QueryResult::Rows {
7391            columns: anchor_cols,
7392            rows: anchor_rows,
7393        } = anchor_result
7394        else {
7395            return Err(EngineError::Unsupported(alloc::format!(
7396                "WITH RECURSIVE {:?}: anchor did not return rows",
7397                cte.name
7398            )));
7399        };
7400        // The projection builder labels non-column expressions Text;
7401        // refine column types from the anchor's actual values so the
7402        // intermediate iter-catalog tables accept them.
7403        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
7404        if !cte.column_overrides.is_empty() {
7405            if cte.column_overrides.len() != columns.len() {
7406                return Err(EngineError::Unsupported(alloc::format!(
7407                    "CTE {:?} column list has {} names but anchor returns {} columns",
7408                    cte.name,
7409                    cte.column_overrides.len(),
7410                    columns.len()
7411                )));
7412            }
7413            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
7414                col.name.clone_from(name);
7415            }
7416        }
7417        let mut all_rows: Vec<Row> = anchor_rows.clone();
7418        let mut working_set: Vec<Row> = anchor_rows;
7419        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
7420        // Track at least one "all UNION ALL" flag — if every union
7421        // kind is ALL we skip the dedup step (faster + matches PG).
7422        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
7423        if !all_union_all {
7424            for r in &all_rows {
7425                seen.insert(encode_row_key(r));
7426            }
7427        }
7428        for iter in 0..MAX_ITERATIONS {
7429            cancel.check()?;
7430            if working_set.is_empty() {
7431                break;
7432            }
7433            // Build a fresh catalog: base + CTE bound to working_set.
7434            let mut iter_catalog = base_catalog.clone();
7435            let schema = TableSchema::new(cte.name.clone(), columns.clone());
7436            iter_catalog
7437                .create_table(schema)
7438                .map_err(EngineError::Storage)?;
7439            {
7440                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
7441                for row in &working_set {
7442                    table.insert(row.clone()).map_err(EngineError::Storage)?;
7443                }
7444            }
7445            let mut iter_engine = Engine::restore(iter_catalog);
7446            if let Some(c) = self.clock {
7447                iter_engine = iter_engine.with_clock(c);
7448            }
7449            if let Some(f) = self.salt_fn {
7450                iter_engine = iter_engine.with_salt_fn(f);
7451            }
7452            // Run each recursive term in sequence and collect new rows.
7453            let mut next_set: Vec<Row> = Vec::new();
7454            for (_, term) in &union_terms {
7455                let mut term = term.clone();
7456                term.ctes = Vec::new();
7457                let r = iter_engine.exec_select_cancel(&term, cancel)?;
7458                let QueryResult::Rows {
7459                    columns: rc,
7460                    rows: rs,
7461                } = r
7462                else {
7463                    return Err(EngineError::Unsupported(alloc::format!(
7464                        "WITH RECURSIVE {:?}: recursive term did not return rows",
7465                        cte.name
7466                    )));
7467                };
7468                if rc.len() != columns.len() {
7469                    return Err(EngineError::Unsupported(alloc::format!(
7470                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
7471                        cte.name,
7472                        rc.len(),
7473                        columns.len()
7474                    )));
7475                }
7476                for row in rs {
7477                    if !all_union_all {
7478                        let key = encode_row_key(&row);
7479                        if !seen.insert(key) {
7480                            continue;
7481                        }
7482                    }
7483                    next_set.push(row);
7484                }
7485            }
7486            if next_set.is_empty() {
7487                break;
7488            }
7489            all_rows.extend(next_set.iter().cloned());
7490            working_set = next_set;
7491            if all_rows.len() > MAX_TOTAL_ROWS {
7492                return Err(EngineError::Unsupported(alloc::format!(
7493                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
7494                    cte.name
7495                )));
7496            }
7497            if iter + 1 == MAX_ITERATIONS {
7498                return Err(EngineError::Unsupported(alloc::format!(
7499                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
7500                    cte.name
7501                )));
7502            }
7503        }
7504        Ok((columns, all_rows))
7505    }
7506
7507    fn resolve_select_subqueries(
7508        &self,
7509        stmt: &mut SelectStatement,
7510        cancel: CancelToken<'_>,
7511    ) -> Result<(), EngineError> {
7512        for item in &mut stmt.items {
7513            if let SelectItem::Expr { expr, .. } = item {
7514                self.resolve_expr_subqueries(expr, cancel)?;
7515            }
7516        }
7517        if let Some(w) = &mut stmt.where_ {
7518            self.resolve_expr_subqueries(w, cancel)?;
7519        }
7520        if let Some(gs) = &mut stmt.group_by {
7521            for g in gs {
7522                self.resolve_expr_subqueries(g, cancel)?;
7523            }
7524        }
7525        if let Some(h) = &mut stmt.having {
7526            self.resolve_expr_subqueries(h, cancel)?;
7527        }
7528        for o in &mut stmt.order_by {
7529            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
7530        }
7531        for (_, peer) in &mut stmt.unions {
7532            self.resolve_select_subqueries(peer, cancel)?;
7533        }
7534        Ok(())
7535    }
7536
7537    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
7538    fn resolve_expr_subqueries(
7539        &self,
7540        e: &mut Expr,
7541        cancel: CancelToken<'_>,
7542    ) -> Result<(), EngineError> {
7543        // Replace-on-this-node cases first.
7544        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
7545            *e = replacement;
7546            return Ok(());
7547        }
7548        match e {
7549            Expr::Binary { lhs, rhs, .. } => {
7550                self.resolve_expr_subqueries(lhs, cancel)?;
7551                self.resolve_expr_subqueries(rhs, cancel)?;
7552            }
7553            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7554                self.resolve_expr_subqueries(expr, cancel)?;
7555            }
7556            Expr::FunctionCall { args, .. } => {
7557                for a in args {
7558                    self.resolve_expr_subqueries(a, cancel)?;
7559                }
7560            }
7561            Expr::Like { expr, pattern, .. } => {
7562                self.resolve_expr_subqueries(expr, cancel)?;
7563                self.resolve_expr_subqueries(pattern, cancel)?;
7564            }
7565            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
7566            // v4.12 window functions — recurse into args + ORDER BY
7567            // + PARTITION BY in case they carry inner subqueries.
7568            Expr::WindowFunction {
7569                args,
7570                partition_by,
7571                order_by,
7572                ..
7573            } => {
7574                for a in args {
7575                    self.resolve_expr_subqueries(a, cancel)?;
7576                }
7577                for p in partition_by {
7578                    self.resolve_expr_subqueries(p, cancel)?;
7579                }
7580                for (e, _) in order_by {
7581                    self.resolve_expr_subqueries(e, cancel)?;
7582                }
7583            }
7584            // Subquery nodes are handled in subquery_replacement
7585            // (which returned None — defensive no-op); Literal /
7586            // Column are leaves.
7587            Expr::ScalarSubquery(_)
7588            | Expr::Exists { .. }
7589            | Expr::InSubquery { .. }
7590            | Expr::Literal(_)
7591            | Expr::Placeholder(_)
7592            | Expr::Column(_) => {}
7593            // v7.10.10 — recurse children.
7594            Expr::Array(items) => {
7595                for elem in items {
7596                    self.resolve_expr_subqueries(elem, cancel)?;
7597                }
7598            }
7599            Expr::ArraySubscript { target, index } => {
7600                self.resolve_expr_subqueries(target, cancel)?;
7601                self.resolve_expr_subqueries(index, cancel)?;
7602            }
7603            Expr::AnyAll { expr, array, .. } => {
7604                self.resolve_expr_subqueries(expr, cancel)?;
7605                self.resolve_expr_subqueries(array, cancel)?;
7606            }
7607            Expr::Case {
7608                operand,
7609                branches,
7610                else_branch,
7611            } => {
7612                if let Some(o) = operand {
7613                    self.resolve_expr_subqueries(o, cancel)?;
7614                }
7615                for (w, t) in branches {
7616                    self.resolve_expr_subqueries(w, cancel)?;
7617                    self.resolve_expr_subqueries(t, cancel)?;
7618                }
7619                if let Some(e) = else_branch {
7620                    self.resolve_expr_subqueries(e, cancel)?;
7621                }
7622            }
7623        }
7624        Ok(())
7625    }
7626
7627    /// v4.23: per-row eval that handles correlated subqueries.
7628    /// Equivalent to `eval::eval_expr` when the expression has no
7629    /// subqueries; otherwise clones the expression, substitutes
7630    /// outer-row columns into each surviving subquery node, runs
7631    /// the inner SELECT, and replaces the node with the literal
7632    /// result. Only the WHERE-filter call sites use this path so
7633    /// the uncorrelated fast path is preserved everywhere else.
7634    fn eval_expr_with_correlated(
7635        &self,
7636        expr: &Expr,
7637        row: &Row,
7638        ctx: &EvalContext<'_>,
7639        cancel: CancelToken<'_>,
7640        memo: Option<&mut memoize::MemoizeCache>,
7641    ) -> Result<Value, EngineError> {
7642        if !expr_has_subquery(expr) {
7643            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
7644        }
7645        let mut e = expr.clone();
7646        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
7647        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
7648    }
7649
7650    fn resolve_correlated_in_expr(
7651        &self,
7652        e: &mut Expr,
7653        row: &Row,
7654        ctx: &EvalContext<'_>,
7655        cancel: CancelToken<'_>,
7656        mut memo: Option<&mut memoize::MemoizeCache>,
7657    ) -> Result<(), EngineError> {
7658        match e {
7659            Expr::ScalarSubquery(inner) => {
7660                // v6.2.6 — Memoize: build the cache key from the
7661                // pre-substitution subquery repr + the outer row's
7662                // values. Two outer rows with identical correlated
7663                // values hit the same entry.
7664                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
7665                    subquery_repr: alloc::format!("{}", **inner),
7666                    outer_values: row.values.clone(),
7667                });
7668                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
7669                    && let Some(cached) = cache.get(k)
7670                {
7671                    *e = value_to_literal_expr(cached)?;
7672                    return Ok(());
7673                }
7674                let mut s = (**inner).clone();
7675                substitute_outer_columns(&mut s, row, ctx);
7676                let r = self.exec_select_cancel(&s, cancel)?;
7677                let QueryResult::Rows { rows, .. } = r else {
7678                    return Err(EngineError::Unsupported(
7679                        "scalar subquery: inner did not return rows".into(),
7680                    ));
7681                };
7682                let value = match rows.as_slice() {
7683                    [] => Value::Null,
7684                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
7685                    _ => {
7686                        return Err(EngineError::Unsupported(alloc::format!(
7687                            "scalar subquery returned {} rows; expected 0 or 1",
7688                            rows.len()
7689                        )));
7690                    }
7691                };
7692                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
7693                    cache.insert(k, value.clone());
7694                }
7695                *e = value_to_literal_expr(value)?;
7696            }
7697            Expr::Exists { subquery, negated } => {
7698                let mut s = (**subquery).clone();
7699                substitute_outer_columns(&mut s, row, ctx);
7700                let r = self.exec_select_cancel(&s, cancel)?;
7701                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
7702                let bit = if *negated { !exists } else { exists };
7703                *e = Expr::Literal(Literal::Bool(bit));
7704            }
7705            Expr::InSubquery {
7706                expr: lhs,
7707                subquery,
7708                negated,
7709            } => {
7710                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7711                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
7712                let mut s = (**subquery).clone();
7713                substitute_outer_columns(&mut s, row, ctx);
7714                let r = self.exec_select_cancel(&s, cancel)?;
7715                let QueryResult::Rows { columns, rows, .. } = r else {
7716                    return Err(EngineError::Unsupported(
7717                        "IN-subquery: inner did not return rows".into(),
7718                    ));
7719                };
7720                if columns.len() != 1 {
7721                    return Err(EngineError::Unsupported(alloc::format!(
7722                        "IN-subquery must project exactly one column; got {}",
7723                        columns.len()
7724                    )));
7725                }
7726                let mut found = false;
7727                let mut any_null = false;
7728                for r0 in rows {
7729                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
7730                    if v.is_null() {
7731                        any_null = true;
7732                        continue;
7733                    }
7734                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
7735                        found = true;
7736                        break;
7737                    }
7738                }
7739                let bit = if found {
7740                    !*negated
7741                } else if any_null {
7742                    return Err(EngineError::Unsupported(
7743                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
7744                    ));
7745                } else {
7746                    *negated
7747                };
7748                *e = Expr::Literal(Literal::Bool(bit));
7749            }
7750            Expr::Binary { lhs, rhs, .. } => {
7751                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7752                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
7753            }
7754            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7755                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7756            }
7757            Expr::Like { expr, pattern, .. } => {
7758                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7759                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
7760            }
7761            Expr::FunctionCall { args, .. } => {
7762                for a in args {
7763                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
7764                }
7765            }
7766            Expr::Extract { source, .. } => {
7767                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
7768            }
7769            Expr::WindowFunction { .. }
7770            | Expr::Literal(_)
7771            | Expr::Placeholder(_)
7772            | Expr::Column(_) => {}
7773            // v7.10.10 — recurse children.
7774            Expr::Array(items) => {
7775                for elem in items {
7776                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
7777                }
7778            }
7779            Expr::ArraySubscript { target, index } => {
7780                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
7781                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
7782            }
7783            Expr::AnyAll { expr, array, .. } => {
7784                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7785                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
7786            }
7787            Expr::Case {
7788                operand,
7789                branches,
7790                else_branch,
7791            } => {
7792                if let Some(o) = operand {
7793                    self.resolve_correlated_in_expr(o, row, ctx, cancel, memo.as_deref_mut())?;
7794                }
7795                for (w, t) in branches {
7796                    self.resolve_correlated_in_expr(w, row, ctx, cancel, memo.as_deref_mut())?;
7797                    self.resolve_correlated_in_expr(t, row, ctx, cancel, memo.as_deref_mut())?;
7798                }
7799                if let Some(e) = else_branch {
7800                    self.resolve_correlated_in_expr(e, row, ctx, cancel, memo.as_deref_mut())?;
7801                }
7802            }
7803        }
7804        Ok(())
7805    }
7806
7807    fn subquery_replacement(
7808        &self,
7809        e: &Expr,
7810        cancel: CancelToken<'_>,
7811    ) -> Result<Option<Expr>, EngineError> {
7812        match e {
7813            Expr::ScalarSubquery(inner) => {
7814                let mut s = (**inner).clone();
7815                // Recurse into the inner SELECT first so nested
7816                // subqueries materialise bottom-up.
7817                self.resolve_select_subqueries(&mut s, cancel)?;
7818                let r = match self.exec_bare_select_cancel(&s, cancel) {
7819                    Ok(r) => r,
7820                    Err(e) if is_correlation_error(&e) => return Ok(None),
7821                    Err(e) => return Err(e),
7822                };
7823                let QueryResult::Rows { rows, .. } = r else {
7824                    return Err(EngineError::Unsupported(
7825                        "scalar subquery: inner statement did not return rows".into(),
7826                    ));
7827                };
7828                let value = match rows.as_slice() {
7829                    [] => Value::Null,
7830                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
7831                    _ => {
7832                        return Err(EngineError::Unsupported(alloc::format!(
7833                            "scalar subquery returned {} rows; expected 0 or 1",
7834                            rows.len()
7835                        )));
7836                    }
7837                };
7838                Ok(Some(value_to_literal_expr(value)?))
7839            }
7840            Expr::Exists { subquery, negated } => {
7841                let mut s = (**subquery).clone();
7842                self.resolve_select_subqueries(&mut s, cancel)?;
7843                let r = match self.exec_bare_select_cancel(&s, cancel) {
7844                    Ok(r) => r,
7845                    Err(e) if is_correlation_error(&e) => return Ok(None),
7846                    Err(e) => return Err(e),
7847                };
7848                let exists = match r {
7849                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
7850                    QueryResult::CommandOk { .. } => false,
7851                };
7852                let bit = if *negated { !exists } else { exists };
7853                Ok(Some(Expr::Literal(Literal::Bool(bit))))
7854            }
7855            Expr::InSubquery {
7856                expr,
7857                subquery,
7858                negated,
7859            } => {
7860                let mut s = (**subquery).clone();
7861                self.resolve_select_subqueries(&mut s, cancel)?;
7862                let r = match self.exec_bare_select_cancel(&s, cancel) {
7863                    Ok(r) => r,
7864                    Err(e) if is_correlation_error(&e) => return Ok(None),
7865                    Err(e) => return Err(e),
7866                };
7867                let QueryResult::Rows { columns, rows, .. } = r else {
7868                    return Err(EngineError::Unsupported(
7869                        "IN-subquery: inner statement did not return rows".into(),
7870                    ));
7871                };
7872                if columns.len() != 1 {
7873                    return Err(EngineError::Unsupported(alloc::format!(
7874                        "IN-subquery must project exactly one column; got {}",
7875                        columns.len()
7876                    )));
7877                }
7878                // Build the same OR-Eq chain the parse-time literal-list
7879                // path constructs, with each value lifted into a Literal.
7880                let mut acc: Option<Expr> = None;
7881                for row in rows {
7882                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
7883                    let lit = value_to_literal_expr(v)?;
7884                    let cmp = Expr::Binary {
7885                        lhs: expr.clone(),
7886                        op: BinOp::Eq,
7887                        rhs: Box::new(lit),
7888                    };
7889                    acc = Some(match acc {
7890                        None => cmp,
7891                        Some(prev) => Expr::Binary {
7892                            lhs: Box::new(prev),
7893                            op: BinOp::Or,
7894                            rhs: Box::new(cmp),
7895                        },
7896                    });
7897                }
7898                let combined = acc.unwrap_or(Expr::Literal(Literal::Bool(false)));
7899                let final_expr = if *negated {
7900                    Expr::Unary {
7901                        op: UnOp::Not,
7902                        expr: Box::new(combined),
7903                    }
7904                } else {
7905                    combined
7906                };
7907                Ok(Some(final_expr))
7908            }
7909            _ => Ok(None),
7910        }
7911    }
7912}
7913
7914// ---- v4.12 window-function helpers ----
7915// The (partition-key, order-key, original-index) tuple shape used
7916// across these helpers is intrinsic to the planner. Factoring it
7917// into a typedef adds indirection without making the code clearer,
7918// so several lints are allowed inline on the affected functions
7919// rather than module-wide.
7920
7921/// v4.22: cheap structural scan for `FROM <name>` (qualified or
7922/// not) inside a SELECT — used to verify the anchor of a WITH
7923/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
7924/// FROM joins, subqueries, and unions.
7925fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
7926    if let Some(from) = &stmt.from
7927        && from_refers_to(from, target)
7928    {
7929        return true;
7930    }
7931    for (_, peer) in &stmt.unions {
7932        if select_refers_to(peer, target) {
7933            return true;
7934        }
7935    }
7936    for item in &stmt.items {
7937        if let SelectItem::Expr { expr, .. } = item
7938            && expr_refers_to(expr, target)
7939        {
7940            return true;
7941        }
7942    }
7943    if let Some(w) = &stmt.where_
7944        && expr_refers_to(w, target)
7945    {
7946        return true;
7947    }
7948    false
7949}
7950
7951fn from_refers_to(from: &FromClause, target: &str) -> bool {
7952    if from.primary.name.eq_ignore_ascii_case(target) {
7953        return true;
7954    }
7955    from.joins
7956        .iter()
7957        .any(|j| j.table.name.eq_ignore_ascii_case(target))
7958}
7959
7960fn expr_refers_to(e: &Expr, target: &str) -> bool {
7961    match e {
7962        Expr::ScalarSubquery(s) => select_refers_to(s, target),
7963        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
7964            select_refers_to(subquery, target)
7965        }
7966        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
7967        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7968            expr_refers_to(expr, target)
7969        }
7970        Expr::Like { expr, pattern, .. } => {
7971            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
7972        }
7973        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
7974        Expr::Extract { source, .. } => expr_refers_to(source, target),
7975        Expr::WindowFunction {
7976            args,
7977            partition_by,
7978            order_by,
7979            ..
7980        } => {
7981            args.iter().any(|a| expr_refers_to(a, target))
7982                || partition_by.iter().any(|p| expr_refers_to(p, target))
7983                || order_by.iter().any(|(o, _)| expr_refers_to(o, target))
7984        }
7985        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
7986        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
7987        Expr::ArraySubscript { target: t, index } => {
7988            expr_refers_to(t, target) || expr_refers_to(index, target)
7989        }
7990        Expr::AnyAll { expr, array, .. } => {
7991            expr_refers_to(expr, target) || expr_refers_to(array, target)
7992        }
7993        Expr::Case {
7994            operand,
7995            branches,
7996            else_branch,
7997        } => {
7998            operand
7999                .as_deref()
8000                .is_some_and(|o| expr_refers_to(o, target))
8001                || branches
8002                    .iter()
8003                    .any(|(w, t)| expr_refers_to(w, target) || expr_refers_to(t, target))
8004                || else_branch
8005                    .as_deref()
8006                    .is_some_and(|e| expr_refers_to(e, target))
8007        }
8008    }
8009}
8010
8011/// v4.22: pick more specific column types from observed rows when
8012/// the projection builder defaulted to Text (the v1.x behavior for
8013/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
8014/// land an Int column in the CTE storage table rather than failing
8015/// the insert with "expected TEXT, got INT".
8016/// v7.16.2 — map an SPG [`DataType`] to the PG-canonical
8017/// `information_schema.columns.data_type` text. Covers the
8018/// values mailrs's migrations probe (`'ARRAY'`, `'integer'`,
8019/// `'text'`, …). Unknown variants fall back to the SPG name
8020/// downcased — better than panicking on a future DataType.
8021fn pg_data_type_text(ty: DataType) -> alloc::string::String {
8022    let s = match ty {
8023        DataType::Int => "integer",
8024        DataType::BigInt => "bigint",
8025        DataType::SmallInt => "smallint",
8026        DataType::Float => "double precision",
8027        DataType::Bool => "boolean",
8028        DataType::Text => "text",
8029        DataType::Varchar(_) => "character varying",
8030        DataType::Date => "date",
8031        DataType::Timestamp => "timestamp without time zone",
8032        DataType::Timestamptz => "timestamp with time zone",
8033        DataType::Json => "jsonb",
8034        DataType::Bytes => "bytea",
8035        DataType::TextArray | DataType::IntArray | DataType::BigIntArray => "ARRAY",
8036        DataType::TsVector => "tsvector",
8037        DataType::TsQuery => "tsquery",
8038        DataType::Vector { .. } => "USER-DEFINED",
8039        // Non-exhaustive — fall back to "USER-DEFINED" the way
8040        // PG labels any pg_type it doesn't recognise.
8041        _ => "USER-DEFINED",
8042    };
8043    alloc::string::String::from(s)
8044}
8045
8046/// v7.16.2 — synthesise `information_schema.columns`. mailrs
8047/// queries are of shape `SELECT 1 FROM information_schema.columns
8048/// WHERE table_name = … AND column_name = … AND data_type = …` —
8049/// the v7.16.2 view returns the columns mailrs probes; broader
8050/// PG-spec parity (ordinal_position, is_nullable, character_
8051/// maximum_length, udt_name, …) lands as needed.
8052fn synth_information_schema_columns(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
8053    let schema = alloc::vec![
8054        ColumnSchema::new("table_catalog", DataType::Text, false),
8055        ColumnSchema::new("table_schema", DataType::Text, false),
8056        ColumnSchema::new("table_name", DataType::Text, false),
8057        ColumnSchema::new("column_name", DataType::Text, false),
8058        ColumnSchema::new("ordinal_position", DataType::Int, false),
8059        ColumnSchema::new("is_nullable", DataType::Text, false),
8060        ColumnSchema::new("data_type", DataType::Text, false),
8061    ];
8062    let mut rows: Vec<Row> = Vec::new();
8063    for tname in cat.table_names() {
8064        let Some(t) = cat.get(&tname) else { continue };
8065        for (i, col) in t.schema().columns.iter().enumerate() {
8066            #[allow(clippy::cast_possible_wrap)]
8067            let ordinal = (i + 1) as i32;
8068            rows.push(Row::new(alloc::vec![
8069                Value::Text("spg".into()),
8070                Value::Text("public".into()),
8071                Value::Text(tname.clone()),
8072                Value::Text(col.name.clone()),
8073                Value::Int(ordinal),
8074                Value::Text(if col.nullable {
8075                    "YES".into()
8076                } else {
8077                    "NO".into()
8078                }),
8079                Value::Text(pg_data_type_text(col.ty)),
8080            ]));
8081        }
8082    }
8083    (schema, rows)
8084}
8085
8086/// v7.16.2 — synthesise `information_schema.tables`.
8087fn synth_information_schema_tables(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
8088    let schema = alloc::vec![
8089        ColumnSchema::new("table_catalog", DataType::Text, false),
8090        ColumnSchema::new("table_schema", DataType::Text, false),
8091        ColumnSchema::new("table_name", DataType::Text, false),
8092        ColumnSchema::new("table_type", DataType::Text, false),
8093    ];
8094    let mut rows: Vec<Row> = Vec::new();
8095    for tname in cat.table_names() {
8096        rows.push(Row::new(alloc::vec![
8097            Value::Text("spg".into()),
8098            Value::Text("public".into()),
8099            Value::Text(tname.clone()),
8100            Value::Text("BASE TABLE".into()),
8101        ]));
8102    }
8103    (schema, rows)
8104}
8105
8106/// v7.16.2 — synthesise `pg_catalog.pg_class`. Minimum shape
8107/// for psql `\d` / ORM probes: `relname` + `relkind`. Each
8108/// user table emits one row.
8109fn synth_pg_class(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
8110    let schema = alloc::vec![
8111        ColumnSchema::new("relname", DataType::Text, false),
8112        ColumnSchema::new("relkind", DataType::Text, false),
8113        ColumnSchema::new("relnamespace", DataType::BigInt, false),
8114    ];
8115    let mut rows: Vec<Row> = Vec::new();
8116    for tname in cat.table_names() {
8117        rows.push(Row::new(alloc::vec![
8118            Value::Text(tname.clone()),
8119            Value::Text("r".into()),
8120            Value::BigInt(2200), // PG's `public` namespace OID
8121        ]));
8122    }
8123    (schema, rows)
8124}
8125
8126/// v7.16.2 — synthesise `pg_catalog.pg_attribute`. Minimum
8127/// shape: `attrelid` (text — SPG has no OID), `attname`,
8128/// `attnum`, `atttypid` (text), `attnotnull`.
8129fn synth_pg_attribute(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
8130    let schema = alloc::vec![
8131        ColumnSchema::new("attrelid", DataType::Text, false),
8132        ColumnSchema::new("attname", DataType::Text, false),
8133        ColumnSchema::new("attnum", DataType::Int, false),
8134        ColumnSchema::new("atttypid", DataType::Text, false),
8135        ColumnSchema::new("attnotnull", DataType::Bool, false),
8136    ];
8137    let mut rows: Vec<Row> = Vec::new();
8138    for tname in cat.table_names() {
8139        let Some(t) = cat.get(&tname) else { continue };
8140        for (i, col) in t.schema().columns.iter().enumerate() {
8141            #[allow(clippy::cast_possible_wrap)]
8142            let ordinal = (i + 1) as i32;
8143            rows.push(Row::new(alloc::vec![
8144                Value::Text(tname.clone()),
8145                Value::Text(col.name.clone()),
8146                Value::Int(ordinal),
8147                Value::Text(pg_data_type_text(col.ty)),
8148                Value::Bool(!col.nullable),
8149            ]));
8150        }
8151    }
8152    (schema, rows)
8153}
8154
8155/// v7.16.2 — drop the synthesised meta view into the enriched
8156/// catalog so the regular FROM-resolution path can see it.
8157fn materialise_meta_view(
8158    catalog: &mut Catalog,
8159    name: &str,
8160    columns: Vec<ColumnSchema>,
8161    rows: Vec<Row>,
8162) -> Result<(), EngineError> {
8163    let schema = TableSchema::new(name.to_string(), columns);
8164    catalog.create_table(schema).map_err(EngineError::Storage)?;
8165    let table = catalog
8166        .get_mut(name)
8167        .expect("just-created meta view must exist");
8168    for row in rows {
8169        table.insert(row).map_err(EngineError::Storage)?;
8170    }
8171    Ok(())
8172}
8173
8174/// v7.16.2 — true when the SELECT statement references any
8175/// `__spg_info_*` or `__spg_pg_*` synthetic table name (the
8176/// parser produces these for `information_schema.X` /
8177/// `pg_catalog.X`). Used by `exec_select_cancel` to short-
8178/// circuit into the meta-view materialisation path.
8179fn select_references_meta_view(stmt: &SelectStatement) -> bool {
8180    fn is_meta(name: &str) -> bool {
8181        name.starts_with("__spg_info_") || name.starts_with("__spg_pg_")
8182    }
8183    if let Some(from) = &stmt.from {
8184        if is_meta(&from.primary.name) {
8185            return true;
8186        }
8187        for j in &from.joins {
8188            if is_meta(&j.table.name) {
8189                return true;
8190            }
8191        }
8192    }
8193    for cte in &stmt.ctes {
8194        if select_references_meta_view(&cte.body) {
8195            return true;
8196        }
8197    }
8198    false
8199}
8200
8201/// v7.16.2 — collect every meta-view name a SELECT touches.
8202/// Returns a deduplicated, sorted list. Caller materialises
8203/// each one into the enriched catalog before re-running the
8204/// SELECT. Walks JOINs, CTEs, and the primary FROM.
8205fn collect_meta_view_names(
8206    stmt: &SelectStatement,
8207    into: &mut alloc::collections::BTreeSet<String>,
8208) {
8209    fn is_meta(name: &str) -> bool {
8210        name.starts_with("__spg_info_") || name.starts_with("__spg_pg_")
8211    }
8212    if let Some(from) = &stmt.from {
8213        if is_meta(&from.primary.name) {
8214            into.insert(from.primary.name.clone());
8215        }
8216        for j in &from.joins {
8217            if is_meta(&j.table.name) {
8218                into.insert(j.table.name.clone());
8219            }
8220        }
8221    }
8222    for cte in &stmt.ctes {
8223        collect_meta_view_names(&cte.body, into);
8224    }
8225}
8226
8227fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
8228    let mut out = columns.to_vec();
8229    for (col_idx, col) in out.iter_mut().enumerate() {
8230        if col.ty != DataType::Text {
8231            continue;
8232        }
8233        let mut inferred: Option<DataType> = None;
8234        let mut all_null = true;
8235        for row in rows {
8236            let Some(v) = row.values.get(col_idx) else {
8237                continue;
8238            };
8239            let ty = match v {
8240                Value::Null => continue,
8241                Value::SmallInt(_) => DataType::SmallInt,
8242                Value::Int(_) => DataType::Int,
8243                Value::BigInt(_) => DataType::BigInt,
8244                Value::Float(_) => DataType::Float,
8245                Value::Bool(_) => DataType::Bool,
8246                Value::Vector(_) => DataType::Vector {
8247                    dim: 0,
8248                    encoding: VecEncoding::F32,
8249                },
8250                _ => DataType::Text,
8251            };
8252            all_null = false;
8253            inferred = Some(match inferred {
8254                None => ty,
8255                Some(prev) if prev == ty => prev,
8256                Some(_) => DataType::Text,
8257            });
8258        }
8259        if let Some(t) = inferred {
8260            col.ty = t;
8261            col.nullable = true;
8262        } else if all_null {
8263            col.nullable = true;
8264        }
8265    }
8266    out
8267}
8268
8269/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
8270/// Lines are pushed into `out`; `depth` controls indentation. We
8271/// describe the rewritten SELECT — what the executor *would* do —
8272/// using the engine handle to spot indexed lookups and table shapes.
8273#[allow(clippy::too_many_lines, clippy::format_push_string)]
8274/// v6.2.4 — Walk every line of the rendered plan tree and append
8275/// per-operator stats. Lines that name a known operator get
8276/// `(rows=N)` (`actual_rows` of the top-level operator equals the
8277/// final result row count; scans report their catalog row count
8278/// as the rows-considered metric). Other lines — Filter / Join /
8279/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
8280/// complete-by-construction; v6.2.5 fills these in via inline
8281/// executor counters.
8282/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
8283/// `(table, column)` pair the query touches via WHERE / JOIN
8284/// that doesn't already have an index on the owning table.
8285/// Walks the SELECT's FROM clauses + WHERE expression tree;
8286/// returns one line per missing index. Deterministic order:
8287/// FROM-clause iteration order, then column-reference walk
8288/// order inside each WHERE. Each suggestion is a copy-pastable
8289/// DDL string.
8290fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
8291    use alloc::collections::BTreeSet;
8292    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
8293    let mut out: Vec<String> = Vec::new();
8294    let cat = engine.active_catalog();
8295    // Build a (table, qualifier-or-alias) list from the FROM clause
8296    // so unqualified column refs in WHERE resolve to the correct
8297    // table.
8298    let Some(from) = &stmt.from else {
8299        return out;
8300    };
8301    let mut tables: Vec<String> = Vec::new();
8302    tables.push(from.primary.name.clone());
8303    for j in &from.joins {
8304        tables.push(j.table.name.clone());
8305    }
8306    // Collect column refs from the WHERE expression. JOIN ON
8307    // predicates also feed in.
8308    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
8309    if let Some(w) = &stmt.where_ {
8310        collect_column_refs(w, &mut col_refs);
8311    }
8312    for j in &from.joins {
8313        if let Some(on) = &j.on {
8314            collect_column_refs(on, &mut col_refs);
8315        }
8316    }
8317    for cn in &col_refs {
8318        // Resolve owner table: explicit qualifier first, else
8319        // first table in FROM that has a column of this name.
8320        let owner: Option<String> = if let Some(q) = &cn.qualifier {
8321            tables.iter().find(|t| t == &q).cloned()
8322        } else {
8323            tables.iter().find_map(|t| {
8324                cat.get(t).and_then(|tbl| {
8325                    if tbl.schema().column_position(&cn.name).is_some() {
8326                        Some(t.clone())
8327                    } else {
8328                        None
8329                    }
8330                })
8331            })
8332        };
8333        let Some(owner) = owner else {
8334            continue;
8335        };
8336        let Some(tbl) = cat.get(&owner) else {
8337            continue;
8338        };
8339        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
8340            continue;
8341        };
8342        // Skip if any BTree index already covers this column as
8343        // its key.
8344        let already_indexed = tbl.indices().iter().any(|i| {
8345            matches!(i.kind, spg_storage::IndexKind::BTree(_))
8346                && i.column_position == col_pos
8347                && i.expression.is_none()
8348                && i.partial_predicate.is_none()
8349        });
8350        if already_indexed {
8351            continue;
8352        }
8353        if seen.insert((owner.clone(), cn.name.clone())) {
8354            out.push(alloc::format!(
8355                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
8356                owner,
8357                cn.name,
8358                owner,
8359                cn.name
8360            ));
8361        }
8362    }
8363    out
8364}
8365
8366/// Walks an `Expr` and pushes every `ColumnName` it references.
8367/// Order is depth-first, left-to-right.
8368fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
8369    match expr {
8370        Expr::Column(cn) => out.push(cn.clone()),
8371        Expr::FunctionCall { args, .. } => {
8372            for a in args {
8373                collect_column_refs(a, out);
8374            }
8375        }
8376        Expr::Binary { lhs, rhs, .. } => {
8377            collect_column_refs(lhs, out);
8378            collect_column_refs(rhs, out);
8379        }
8380        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
8381        _ => {}
8382    }
8383}
8384
8385fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
8386    let catalog = engine.active_catalog();
8387    let cold_ids = catalog.cold_segment_ids_global();
8388    let any_cold = !cold_ids.is_empty();
8389    let cold_ids_repr = if any_cold {
8390        let mut s = alloc::string::String::from("[");
8391        for (i, id) in cold_ids.iter().enumerate() {
8392            if i > 0 {
8393                s.push(',');
8394            }
8395            s.push_str(&alloc::format!("{id}"));
8396        }
8397        s.push(']');
8398        s
8399    } else {
8400        alloc::string::String::new()
8401    };
8402    for (idx, line) in lines.iter_mut().enumerate() {
8403        let trimmed = line.trim_start();
8404        let is_top_level = idx == 0;
8405        if is_top_level {
8406            line.push_str(&alloc::format!(" (rows={total_rows})"));
8407            continue;
8408        }
8409        if let Some(rest) = trimmed.strip_prefix("From: ") {
8410            let (name, scan_kind) = match rest.split_once(" [") {
8411                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
8412                None => (rest.trim(), ""),
8413            };
8414            let bare = name.split_whitespace().next().unwrap_or(name);
8415            let hot = catalog.get(bare).map(|t| t.rows().len());
8416            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
8417            // cold-tier segment the scan COULD have walked. v6.2.x
8418            // can tighten to per-table by walking the table's
8419            // BTree-index cold locators.
8420            let annot = match (hot, scan_kind) {
8421                (Some(h), "full scan") => {
8422                    let mut s = alloc::format!(" (hot_rows={h}");
8423                    if any_cold {
8424                        s.push_str(&alloc::format!(
8425                            ", cold_tier=present, cold_segments={cold_ids_repr}"
8426                        ));
8427                    }
8428                    s.push(')');
8429                    s
8430                }
8431                (Some(h), "index seek") => {
8432                    let mut s = alloc::format!(" (hot_rows≤{h}");
8433                    if any_cold {
8434                        s.push_str(&alloc::format!(
8435                            ", cold_tier=present, cold_segments={cold_ids_repr}"
8436                        ));
8437                    }
8438                    s.push(')');
8439                    s
8440                }
8441                _ => " (rows=—)".to_string(),
8442            };
8443            line.push_str(&annot);
8444            continue;
8445        }
8446        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
8447        line.push_str(" (rows=—)");
8448    }
8449}
8450
8451fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
8452    let pad = "  ".repeat(depth);
8453    // 1) Top-level operator label.
8454    let top = if !stmt.ctes.is_empty() {
8455        if stmt.ctes.iter().any(|c| c.recursive) {
8456            "CTEScan (WITH RECURSIVE)"
8457        } else {
8458            "CTEScan (WITH)"
8459        }
8460    } else if !stmt.unions.is_empty() {
8461        "UnionScan"
8462    } else if select_has_window(stmt) {
8463        "WindowAgg"
8464    } else if aggregate::uses_aggregate(stmt) {
8465        "Aggregate"
8466    } else if stmt.distinct {
8467        "Distinct"
8468    } else if stmt.from.is_some() {
8469        "TableScan"
8470    } else {
8471        "Result"
8472    };
8473    out.push(alloc::format!("{pad}{top}"));
8474    let child = "  ".repeat(depth + 1);
8475    // 2) CTE bodies.
8476    for cte in &stmt.ctes {
8477        let head = if cte.recursive {
8478            alloc::format!("{child}CTE (recursive): {}", cte.name)
8479        } else {
8480            alloc::format!("{child}CTE: {}", cte.name)
8481        };
8482        out.push(head);
8483        explain_select(&cte.body, engine, depth + 2, out);
8484    }
8485    // 3) FROM details — primary table + joins, index hits.
8486    if let Some(from) = &stmt.from {
8487        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
8488        if let Some(alias) = &from.primary.alias {
8489            tag.push_str(&alloc::format!(" AS {alias}"));
8490        }
8491        // Try to detect an index-seek opportunity on WHERE against
8492        // the primary table — same heuristic the executor uses.
8493        if let Some(w) = &stmt.where_
8494            && let Some(table) = engine.active_catalog().get(&from.primary.name)
8495        {
8496            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
8497            let cols = &table.schema().columns;
8498            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
8499                tag.push_str(" [index seek]");
8500            } else {
8501                tag.push_str(" [full scan]");
8502            }
8503        } else {
8504            tag.push_str(" [full scan]");
8505        }
8506        out.push(tag);
8507        for j in &from.joins {
8508            let kind = match j.kind {
8509                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
8510                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
8511                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
8512            };
8513            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
8514            if let Some(alias) = &j.table.alias {
8515                s.push_str(&alloc::format!(" AS {alias}"));
8516            }
8517            if j.on.is_some() {
8518                s.push_str(" (ON …)");
8519            }
8520            out.push(s);
8521        }
8522    }
8523    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
8524    if let Some(w) = &stmt.where_ {
8525        let mut s = alloc::format!("{child}Filter: {w}");
8526        if expr_has_subquery(w) {
8527            s.push_str(" [subquery]");
8528        }
8529        out.push(s);
8530    }
8531    if let Some(gs) = &stmt.group_by {
8532        let mut parts = Vec::new();
8533        for g in gs {
8534            parts.push(alloc::format!("{g}"));
8535        }
8536        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
8537    }
8538    if let Some(h) = &stmt.having {
8539        out.push(alloc::format!("{child}Having: {h}"));
8540    }
8541    for o in &stmt.order_by {
8542        let dir = if o.desc { "DESC" } else { "ASC" };
8543        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
8544    }
8545    if let Some(lim) = stmt.limit {
8546        out.push(alloc::format!("{child}Limit: {lim}"));
8547    }
8548    if let Some(off) = stmt.offset {
8549        out.push(alloc::format!("{child}Offset: {off}"));
8550    }
8551    // 5) Projection — collapse Wildcard or render N items.
8552    if stmt
8553        .items
8554        .iter()
8555        .any(|it| matches!(it, SelectItem::Wildcard))
8556    {
8557        out.push(alloc::format!("{child}Project: *"));
8558    } else {
8559        out.push(alloc::format!(
8560            "{child}Project: {} item(s)",
8561            stmt.items.len()
8562        ));
8563    }
8564    // 6) Recurse into UNION peers.
8565    for (kind, peer) in &stmt.unions {
8566        let label = match kind {
8567            UnionKind::All => "UNION ALL",
8568            UnionKind::Distinct => "UNION",
8569        };
8570        out.push(alloc::format!("{child}{label}"));
8571        explain_select(peer, engine, depth + 2, out);
8572    }
8573}
8574
8575/// v4.23: recognise the engine errors that indicate the inner
8576/// SELECT couldn't be evaluated in isolation because it references
8577/// an outer column — used by `subquery_replacement` to skip
8578/// materialisation and let row-eval handle it instead.
8579fn is_correlation_error(e: &EngineError) -> bool {
8580    matches!(
8581        e,
8582        EngineError::Eval(
8583            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
8584        )
8585    )
8586}
8587
8588/// v4.23: walk every Expr in `stmt` and replace each Column ref
8589/// that targets the outer scope (qualifier matches the outer
8590/// table alias) with a Literal carrying the outer row's value.
8591/// Conservative: only qualified refs are substituted, so the user
8592/// must write `outer_alias.col` to reference an outer column. This
8593/// matches PG's lexical scoping for correlated subqueries and
8594/// avoids accidentally rebinding inner columns of the same name.
8595fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
8596    let Some(outer_alias) = ctx.table_alias else {
8597        return;
8598    };
8599    substitute_in_select(stmt, row, ctx, outer_alias);
8600}
8601
8602fn substitute_in_select(
8603    stmt: &mut SelectStatement,
8604    row: &Row,
8605    ctx: &EvalContext<'_>,
8606    outer_alias: &str,
8607) {
8608    for item in &mut stmt.items {
8609        if let SelectItem::Expr { expr, .. } = item {
8610            substitute_in_expr(expr, row, ctx, outer_alias);
8611        }
8612    }
8613    if let Some(w) = &mut stmt.where_ {
8614        substitute_in_expr(w, row, ctx, outer_alias);
8615    }
8616    if let Some(gs) = &mut stmt.group_by {
8617        for g in gs {
8618            substitute_in_expr(g, row, ctx, outer_alias);
8619        }
8620    }
8621    if let Some(h) = &mut stmt.having {
8622        substitute_in_expr(h, row, ctx, outer_alias);
8623    }
8624    for o in &mut stmt.order_by {
8625        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
8626    }
8627    for (_, peer) in &mut stmt.unions {
8628        substitute_in_select(peer, row, ctx, outer_alias);
8629    }
8630}
8631
8632fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
8633    if let Expr::Column(c) = e
8634        && let Some(qual) = &c.qualifier
8635        && qual.eq_ignore_ascii_case(outer_alias)
8636    {
8637        // Look up the column's index in the outer schema.
8638        if let Some(idx) = ctx
8639            .columns
8640            .iter()
8641            .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
8642        {
8643            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
8644            if let Ok(lit) = value_to_literal_expr(v) {
8645                *e = lit;
8646                return;
8647            }
8648        }
8649    }
8650    match e {
8651        Expr::Binary { lhs, rhs, .. } => {
8652            substitute_in_expr(lhs, row, ctx, outer_alias);
8653            substitute_in_expr(rhs, row, ctx, outer_alias);
8654        }
8655        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8656            substitute_in_expr(expr, row, ctx, outer_alias);
8657        }
8658        Expr::Like { expr, pattern, .. } => {
8659            substitute_in_expr(expr, row, ctx, outer_alias);
8660            substitute_in_expr(pattern, row, ctx, outer_alias);
8661        }
8662        Expr::FunctionCall { args, .. } => {
8663            for a in args {
8664                substitute_in_expr(a, row, ctx, outer_alias);
8665            }
8666        }
8667        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
8668        Expr::WindowFunction {
8669            args,
8670            partition_by,
8671            order_by,
8672            ..
8673        } => {
8674            for a in args {
8675                substitute_in_expr(a, row, ctx, outer_alias);
8676            }
8677            for p in partition_by {
8678                substitute_in_expr(p, row, ctx, outer_alias);
8679            }
8680            for (o, _) in order_by {
8681                substitute_in_expr(o, row, ctx, outer_alias);
8682            }
8683        }
8684        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
8685        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
8686            substitute_in_select(subquery, row, ctx, outer_alias);
8687        }
8688        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
8689        Expr::Array(items) => {
8690            for elem in items {
8691                substitute_in_expr(elem, row, ctx, outer_alias);
8692            }
8693        }
8694        Expr::ArraySubscript { target, index } => {
8695            substitute_in_expr(target, row, ctx, outer_alias);
8696            substitute_in_expr(index, row, ctx, outer_alias);
8697        }
8698        Expr::AnyAll { expr, array, .. } => {
8699            substitute_in_expr(expr, row, ctx, outer_alias);
8700            substitute_in_expr(array, row, ctx, outer_alias);
8701        }
8702        Expr::Case {
8703            operand,
8704            branches,
8705            else_branch,
8706        } => {
8707            if let Some(o) = operand {
8708                substitute_in_expr(o, row, ctx, outer_alias);
8709            }
8710            for (w, t) in branches {
8711                substitute_in_expr(w, row, ctx, outer_alias);
8712                substitute_in_expr(t, row, ctx, outer_alias);
8713            }
8714            if let Some(e) = else_branch {
8715                substitute_in_expr(e, row, ctx, outer_alias);
8716            }
8717        }
8718    }
8719}
8720
8721/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
8722/// dedup inside the recursive iteration. Crude but deterministic
8723/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
8724fn encode_row_key(row: &Row) -> Vec<u8> {
8725    let mut out = Vec::new();
8726    for v in &row.values {
8727        let s = alloc::format!("{v:?}|");
8728        out.extend_from_slice(s.as_bytes());
8729    }
8730    out
8731}
8732
8733fn select_has_window(stmt: &SelectStatement) -> bool {
8734    for item in &stmt.items {
8735        if let SelectItem::Expr { expr, .. } = item
8736            && expr_has_window(expr)
8737        {
8738            return true;
8739        }
8740    }
8741    false
8742}
8743
8744fn expr_has_window(e: &Expr) -> bool {
8745    match e {
8746        Expr::WindowFunction { .. } => true,
8747        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
8748        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8749            expr_has_window(expr)
8750        }
8751        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
8752        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
8753        Expr::Extract { source, .. } => expr_has_window(source),
8754        Expr::ScalarSubquery(_)
8755        | Expr::Exists { .. }
8756        | Expr::InSubquery { .. }
8757        | Expr::Literal(_)
8758        | Expr::Placeholder(_)
8759        | Expr::Column(_) => false,
8760        Expr::Array(items) => items.iter().any(expr_has_window),
8761        Expr::ArraySubscript { target, index } => expr_has_window(target) || expr_has_window(index),
8762        Expr::AnyAll { expr, array, .. } => expr_has_window(expr) || expr_has_window(array),
8763        Expr::Case {
8764            operand,
8765            branches,
8766            else_branch,
8767        } => {
8768            operand.as_deref().is_some_and(expr_has_window)
8769                || branches
8770                    .iter()
8771                    .any(|(w, t)| expr_has_window(w) || expr_has_window(t))
8772                || else_branch.as_deref().is_some_and(expr_has_window)
8773        }
8774    }
8775}
8776
8777fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
8778    if let Expr::WindowFunction { .. } = e {
8779        // Deduplicate by structural equality on the expression
8780        // (cheap because window args + partition + order are
8781        // small). Without dedup we'd recompute identical windows
8782        // once per occurrence in the projection.
8783        if !out.iter().any(|x| x == e) {
8784            out.push(e.clone());
8785        }
8786        return;
8787    }
8788    match e {
8789        // Already handled by the early-return at the top.
8790        Expr::WindowFunction { .. } => unreachable!(),
8791        Expr::Binary { lhs, rhs, .. } => {
8792            collect_window_nodes(lhs, out);
8793            collect_window_nodes(rhs, out);
8794        }
8795        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8796            collect_window_nodes(expr, out);
8797        }
8798        Expr::FunctionCall { args, .. } => {
8799            for a in args {
8800                collect_window_nodes(a, out);
8801            }
8802        }
8803        Expr::Like { expr, pattern, .. } => {
8804            collect_window_nodes(expr, out);
8805            collect_window_nodes(pattern, out);
8806        }
8807        Expr::Extract { source, .. } => collect_window_nodes(source, out),
8808        _ => {}
8809    }
8810}
8811
8812fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
8813    if let Expr::WindowFunction { .. } = e
8814        && let Some(idx) = window_nodes.iter().position(|w| w == e)
8815    {
8816        *e = Expr::Column(spg_sql::ast::ColumnName {
8817            qualifier: None,
8818            name: alloc::format!("__win_{idx}"),
8819        });
8820        return;
8821    }
8822    match e {
8823        Expr::Binary { lhs, rhs, .. } => {
8824            rewrite_window_to_columns(lhs, window_nodes);
8825            rewrite_window_to_columns(rhs, window_nodes);
8826        }
8827        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8828            rewrite_window_to_columns(expr, window_nodes);
8829        }
8830        Expr::FunctionCall { args, .. } => {
8831            for a in args {
8832                rewrite_window_to_columns(a, window_nodes);
8833            }
8834        }
8835        Expr::Like { expr, pattern, .. } => {
8836            rewrite_window_to_columns(expr, window_nodes);
8837            rewrite_window_to_columns(pattern, window_nodes);
8838        }
8839        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
8840        _ => {}
8841    }
8842}
8843
8844/// Total order over partition-key tuples. NULL sorts as the
8845/// lowest value (matches the `<` partial order's NULL-last
8846/// behaviour with `INFINITY` flipped).
8847fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
8848    for (x, y) in a.iter().zip(b.iter()) {
8849        let c = value_cmp(x, y);
8850        if c != core::cmp::Ordering::Equal {
8851            return c;
8852        }
8853    }
8854    a.len().cmp(&b.len())
8855}
8856
8857fn order_key_cmp(a: &[(Value, bool)], b: &[(Value, bool)]) -> core::cmp::Ordering {
8858    for ((va, desc), (vb, _)) in a.iter().zip(b.iter()) {
8859        let c = value_cmp(va, vb);
8860        let c = if *desc { c.reverse() } else { c };
8861        if c != core::cmp::Ordering::Equal {
8862            return c;
8863        }
8864    }
8865    a.len().cmp(&b.len())
8866}
8867
8868#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
8869fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
8870    use core::cmp::Ordering;
8871    match (a, b) {
8872        (Value::Null, Value::Null) => Ordering::Equal,
8873        (Value::Null, _) => Ordering::Less,
8874        (_, Value::Null) => Ordering::Greater,
8875        (Value::Int(x), Value::Int(y)) => x.cmp(y),
8876        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
8877        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
8878        (Value::Text(x), Value::Text(y)) => x.cmp(y),
8879        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
8880        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
8881        (Value::Date(x), Value::Date(y)) => x.cmp(y),
8882        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
8883        // Cross-type compare: fall back to the debug rendering —
8884        // same-partition is the goal, exact order is irrelevant.
8885        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
8886    }
8887}
8888
8889/// Compute the window function's per-row output for one partition.
8890/// `slice` has (partition key, order key, original-row-index)
8891/// tuples already sorted by order key. `filtered_rows` is the
8892/// full row list indexed by original-row-index. `out_vals` is
8893/// the destination, also indexed by original-row-index.
8894#[allow(
8895    clippy::too_many_arguments,
8896    clippy::cast_possible_truncation,
8897    clippy::cast_possible_wrap,
8898    clippy::cast_precision_loss,
8899    clippy::cast_sign_loss,
8900    clippy::doc_markdown,
8901    clippy::too_many_lines,
8902    clippy::type_complexity,
8903    clippy::match_same_arms
8904)]
8905fn compute_window_partition(
8906    name: &str,
8907    args: &[Expr],
8908    ordered: bool,
8909    frame: Option<&WindowFrame>,
8910    null_treatment: spg_sql::ast::NullTreatment,
8911    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8912    filtered_rows: &[&Row],
8913    ctx: &EvalContext<'_>,
8914    out_vals: &mut [Value],
8915) -> Result<(), EngineError> {
8916    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
8917    let lower = name.to_ascii_lowercase();
8918    match lower.as_str() {
8919        "row_number" => {
8920            for (rank, (_, _, idx)) in slice.iter().enumerate() {
8921                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
8922            }
8923            Ok(())
8924        }
8925        "rank" => {
8926            let mut prev_key: Option<&[(Value, bool)]> = None;
8927            let mut current_rank: i64 = 1;
8928            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8929                if let Some(p) = prev_key
8930                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8931                {
8932                    current_rank = (i + 1) as i64;
8933                }
8934                if prev_key.is_none() {
8935                    current_rank = 1;
8936                }
8937                out_vals[*idx] = Value::BigInt(current_rank);
8938                prev_key = Some(okey.as_slice());
8939            }
8940            Ok(())
8941        }
8942        "dense_rank" => {
8943            let mut prev_key: Option<&[(Value, bool)]> = None;
8944            let mut current_rank: i64 = 0;
8945            for (_, okey, idx) in slice {
8946                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
8947                    current_rank += 1;
8948                }
8949                out_vals[*idx] = Value::BigInt(current_rank);
8950                prev_key = Some(okey.as_slice());
8951            }
8952            Ok(())
8953        }
8954        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
8955            // Pre-evaluate the function arg per row in the slice
8956            // (count_star has no arg).
8957            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
8958                slice.iter().map(|_| Value::Null).collect()
8959            } else {
8960                slice
8961                    .iter()
8962                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8963                    .collect::<Result<_, _>>()
8964                    .map_err(EngineError::Eval)?
8965            };
8966            // v4.20: pick the effective frame. Explicit frame
8967            // overrides the implicit default (running for ordered,
8968            // whole-partition for unordered).
8969            let eff = effective_frame(frame, ordered)?;
8970            #[allow(clippy::needless_range_loop)]
8971            for i in 0..slice.len() {
8972                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8973                let mut sum: f64 = 0.0;
8974                let mut count: i64 = 0;
8975                let mut min_v: Option<f64> = None;
8976                let mut max_v: Option<f64> = None;
8977                let mut row_count: i64 = 0;
8978                if lo <= hi {
8979                    for j in lo..=hi {
8980                        let v = &arg_values[j];
8981                        match lower.as_str() {
8982                            "count_star" => row_count += 1,
8983                            "count" => {
8984                                if !v.is_null() {
8985                                    count += 1;
8986                                }
8987                            }
8988                            _ => {
8989                                if let Some(x) = value_to_f64(v) {
8990                                    sum += x;
8991                                    count += 1;
8992                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
8993                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
8994                                }
8995                            }
8996                        }
8997                    }
8998                }
8999                let value = match lower.as_str() {
9000                    "count_star" => Value::BigInt(row_count),
9001                    "count" => Value::BigInt(count),
9002                    "sum" => Value::Float(sum),
9003                    "avg" => {
9004                        if count == 0 {
9005                            Value::Null
9006                        } else {
9007                            Value::Float(sum / count as f64)
9008                        }
9009                    }
9010                    "min" => min_v.map_or(Value::Null, Value::Float),
9011                    "max" => max_v.map_or(Value::Null, Value::Float),
9012                    _ => unreachable!(),
9013                };
9014                let (_, _, idx) = &slice[i];
9015                out_vals[*idx] = value;
9016            }
9017            Ok(())
9018        }
9019        "lag" | "lead" => {
9020            // lag(expr [, offset [, default]])
9021            // lead(expr [, offset [, default]])
9022            if args.is_empty() {
9023                return Err(EngineError::Unsupported(alloc::format!(
9024                    "{lower}() requires at least one argument"
9025                )));
9026            }
9027            let offset: i64 = if args.len() >= 2 {
9028                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
9029                    .map_err(EngineError::Eval)?;
9030                match v {
9031                    Value::SmallInt(n) => i64::from(n),
9032                    Value::Int(n) => i64::from(n),
9033                    Value::BigInt(n) => n,
9034                    _ => {
9035                        return Err(EngineError::Unsupported(alloc::format!(
9036                            "{lower}() offset must be integer"
9037                        )));
9038                    }
9039                }
9040            } else {
9041                1
9042            };
9043            let default: Value = if args.len() >= 3 {
9044                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
9045                    .map_err(EngineError::Eval)?
9046            } else {
9047                Value::Null
9048            };
9049            let values: Vec<Value> = slice
9050                .iter()
9051                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
9052                .collect::<Result<_, _>>()
9053                .map_err(EngineError::Eval)?;
9054            let n = slice.len();
9055            for (i, (_, _, idx)) in slice.iter().enumerate() {
9056                let signed_offset = if lower == "lag" { -offset } else { offset };
9057                let v = if ignore_nulls {
9058                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
9059                    // skipping NULL values; the `offset`-th non-NULL
9060                    // encountered is the result.
9061                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
9062                    let needed: i64 = signed_offset.abs();
9063                    if needed == 0 {
9064                        values[i].clone()
9065                    } else {
9066                        let mut j: i64 = i as i64;
9067                        let mut hits: i64 = 0;
9068                        let mut found: Option<Value> = None;
9069                        loop {
9070                            j += step;
9071                            if j < 0 || j >= n as i64 {
9072                                break;
9073                            }
9074                            #[allow(clippy::cast_sign_loss)]
9075                            let v = &values[j as usize];
9076                            if !v.is_null() {
9077                                hits += 1;
9078                                if hits == needed {
9079                                    found = Some(v.clone());
9080                                    break;
9081                                }
9082                            }
9083                        }
9084                        found.unwrap_or_else(|| default.clone())
9085                    }
9086                } else {
9087                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
9088                    if target_signed < 0 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX) {
9089                        default.clone()
9090                    } else {
9091                        #[allow(clippy::cast_sign_loss)]
9092                        {
9093                            values[target_signed as usize].clone()
9094                        }
9095                    }
9096                };
9097                out_vals[*idx] = v;
9098            }
9099            Ok(())
9100        }
9101        "first_value" | "last_value" | "nth_value" => {
9102            if args.is_empty() {
9103                return Err(EngineError::Unsupported(alloc::format!(
9104                    "{lower}() requires at least one argument"
9105                )));
9106            }
9107            let values: Vec<Value> = slice
9108                .iter()
9109                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
9110                .collect::<Result<_, _>>()
9111                .map_err(EngineError::Eval)?;
9112            let nth: usize = if lower == "nth_value" {
9113                if args.len() < 2 {
9114                    return Err(EngineError::Unsupported(
9115                        "nth_value() requires (expr, n)".into(),
9116                    ));
9117                }
9118                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
9119                    .map_err(EngineError::Eval)?;
9120                let raw = match v {
9121                    Value::SmallInt(n) => i64::from(n),
9122                    Value::Int(n) => i64::from(n),
9123                    Value::BigInt(n) => n,
9124                    _ => {
9125                        return Err(EngineError::Unsupported(
9126                            "nth_value() n must be integer".into(),
9127                        ));
9128                    }
9129                };
9130                if raw < 1 {
9131                    return Err(EngineError::Unsupported(
9132                        "nth_value() n must be >= 1".into(),
9133                    ));
9134                }
9135                #[allow(clippy::cast_sign_loss)]
9136                {
9137                    raw as usize
9138                }
9139            } else {
9140                0
9141            };
9142            let eff = effective_frame(frame, ordered)?;
9143            for i in 0..slice.len() {
9144                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
9145                let (_, _, idx) = &slice[i];
9146                let v = if lo > hi {
9147                    Value::Null
9148                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
9149                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
9150                    // selecting the boundary value within the frame.
9151                    if lower == "first_value" {
9152                        (lo..=hi)
9153                            .find_map(|j| {
9154                                let v = &values[j];
9155                                (!v.is_null()).then(|| v.clone())
9156                            })
9157                            .unwrap_or(Value::Null)
9158                    } else {
9159                        (lo..=hi)
9160                            .rev()
9161                            .find_map(|j| {
9162                                let v = &values[j];
9163                                (!v.is_null()).then(|| v.clone())
9164                            })
9165                            .unwrap_or(Value::Null)
9166                    }
9167                } else {
9168                    match lower.as_str() {
9169                        "first_value" => values[lo].clone(),
9170                        "last_value" => values[hi].clone(),
9171                        "nth_value" => {
9172                            let pos = lo + nth - 1;
9173                            if pos > hi {
9174                                Value::Null
9175                            } else {
9176                                values[pos].clone()
9177                            }
9178                        }
9179                        _ => unreachable!(),
9180                    }
9181                };
9182                out_vals[*idx] = v;
9183            }
9184            Ok(())
9185        }
9186        "ntile" => {
9187            if args.is_empty() {
9188                return Err(EngineError::Unsupported(
9189                    "ntile(n) requires an integer argument".into(),
9190                ));
9191            }
9192            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
9193                .map_err(EngineError::Eval)?;
9194            let bucket_count: i64 = match v {
9195                Value::SmallInt(n) => i64::from(n),
9196                Value::Int(n) => i64::from(n),
9197                Value::BigInt(n) => n,
9198                _ => {
9199                    return Err(EngineError::Unsupported(
9200                        "ntile() argument must be integer".into(),
9201                    ));
9202                }
9203            };
9204            if bucket_count < 1 {
9205                return Err(EngineError::Unsupported(
9206                    "ntile() argument must be >= 1".into(),
9207                ));
9208            }
9209            #[allow(clippy::cast_sign_loss)]
9210            let buckets = bucket_count as usize;
9211            let n = slice.len();
9212            // Each bucket gets `base` rows; the first `extras` buckets
9213            // get one extra. PG semantics.
9214            let base = n / buckets;
9215            let extras = n % buckets;
9216            let mut bucket: usize = 1;
9217            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
9218            let mut buckets_with_extra_remaining = extras;
9219            for (_, _, idx) in slice {
9220                if remaining_in_bucket == 0 {
9221                    bucket += 1;
9222                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
9223                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
9224                        base + 1
9225                    } else {
9226                        base
9227                    };
9228                    // Edge: if base==0 and extras==0, all rows fit;
9229                    // shouldn't reach here, but guard anyway.
9230                    if remaining_in_bucket == 0 {
9231                        remaining_in_bucket = 1;
9232                    }
9233                }
9234                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
9235                remaining_in_bucket -= 1;
9236            }
9237            Ok(())
9238        }
9239        "percent_rank" => {
9240            // (rank - 1) / (n - 1) where rank is the standard RANK().
9241            // Single-row partitions get 0.
9242            let n = slice.len();
9243            let mut prev_key: Option<&[(Value, bool)]> = None;
9244            let mut current_rank: i64 = 1;
9245            for (i, (_, okey, idx)) in slice.iter().enumerate() {
9246                if let Some(p) = prev_key
9247                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
9248                {
9249                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
9250                }
9251                if prev_key.is_none() {
9252                    current_rank = 1;
9253                }
9254                #[allow(clippy::cast_precision_loss)]
9255                let pr = if n <= 1 {
9256                    0.0
9257                } else {
9258                    (current_rank - 1) as f64 / (n - 1) as f64
9259                };
9260                out_vals[*idx] = Value::Float(pr);
9261                prev_key = Some(okey.as_slice());
9262            }
9263            Ok(())
9264        }
9265        "cume_dist" => {
9266            // # rows up to and including this row's peer group / n.
9267            let n = slice.len();
9268            // First pass: find peer-group-end rank for each row.
9269            for i in 0..slice.len() {
9270                let peer_end = peer_group_end(slice, i);
9271                #[allow(clippy::cast_precision_loss)]
9272                let cd = (peer_end + 1) as f64 / n as f64;
9273                let (_, _, idx) = &slice[i];
9274                out_vals[*idx] = Value::Float(cd);
9275            }
9276            Ok(())
9277        }
9278        other => Err(EngineError::Unsupported(alloc::format!(
9279            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
9280        ))),
9281    }
9282}
9283
9284/// v4.20: resolve the user-provided frame down to a normalised
9285/// `(kind, start, end)`. `None` means default — derive from
9286/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
9287/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
9288/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
9289/// end → CURRENT ROW per the PG spec.
9290fn effective_frame(
9291    frame: Option<&WindowFrame>,
9292    ordered: bool,
9293) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
9294    match frame {
9295        None => {
9296            if ordered {
9297                Ok((
9298                    FrameKind::Range,
9299                    FrameBound::UnboundedPreceding,
9300                    FrameBound::CurrentRow,
9301                ))
9302            } else {
9303                Ok((
9304                    FrameKind::Rows,
9305                    FrameBound::UnboundedPreceding,
9306                    FrameBound::UnboundedFollowing,
9307                ))
9308            }
9309        }
9310        Some(fr) => {
9311            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
9312            // Reject start > end (a few impossible combinations).
9313            if matches!(fr.start, FrameBound::UnboundedFollowing)
9314                || matches!(end, FrameBound::UnboundedPreceding)
9315            {
9316                return Err(EngineError::Unsupported(alloc::format!(
9317                    "invalid frame: start={:?} end={:?}",
9318                    fr.start,
9319                    end
9320                )));
9321            }
9322            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
9323            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
9324            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
9325            // implemented in v4.20.
9326            if fr.kind == FrameKind::Range
9327                && (matches!(
9328                    fr.start,
9329                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
9330                ) || matches!(
9331                    end,
9332                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
9333                ))
9334            {
9335                return Err(EngineError::Unsupported(
9336                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
9337                ));
9338            }
9339            Ok((fr.kind, fr.start.clone(), end))
9340        }
9341    }
9342}
9343
9344/// Compute `(lo, hi)` row-index bounds inside the partition slice
9345/// for the row at position `i`. Inclusive, clamped to
9346/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
9347#[allow(clippy::type_complexity)]
9348fn frame_bounds_for_row(
9349    eff: &(FrameKind, FrameBound, FrameBound),
9350    i: usize,
9351    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
9352) -> (usize, usize) {
9353    let (kind, start, end) = eff;
9354    let n = slice.len();
9355    let last = n.saturating_sub(1);
9356    let (mut lo, mut hi) = match kind {
9357        FrameKind::Rows => {
9358            let lo = match start {
9359                FrameBound::UnboundedPreceding => 0,
9360                FrameBound::OffsetPreceding(k) => {
9361                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
9362                    i.saturating_sub(k)
9363                }
9364                FrameBound::CurrentRow => i,
9365                FrameBound::OffsetFollowing(k) => {
9366                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
9367                    i.saturating_add(k).min(last)
9368                }
9369                FrameBound::UnboundedFollowing => last,
9370            };
9371            let hi = match end {
9372                FrameBound::UnboundedPreceding => 0,
9373                FrameBound::OffsetPreceding(k) => {
9374                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
9375                    i.saturating_sub(k)
9376                }
9377                FrameBound::CurrentRow => i,
9378                FrameBound::OffsetFollowing(k) => {
9379                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
9380                    i.saturating_add(k).min(last)
9381                }
9382                FrameBound::UnboundedFollowing => last,
9383            };
9384            (lo, hi)
9385        }
9386        FrameKind::Range => {
9387            // RANGE bounds are peer-aware. With only UNBOUNDED and
9388            // CURRENT ROW supported (rejected at effective_frame for
9389            // explicit offsets), the start/end map to the
9390            // partition's full extent at the same-order-key peer
9391            // group boundary.
9392            let lo = match start {
9393                FrameBound::UnboundedPreceding => 0,
9394                FrameBound::CurrentRow => peer_group_start(slice, i),
9395                FrameBound::UnboundedFollowing => last,
9396                _ => unreachable!("offset bounds rejected for RANGE"),
9397            };
9398            let hi = match end {
9399                FrameBound::UnboundedPreceding => 0,
9400                FrameBound::CurrentRow => peer_group_end(slice, i),
9401                FrameBound::UnboundedFollowing => last,
9402                _ => unreachable!("offset bounds rejected for RANGE"),
9403            };
9404            (lo, hi)
9405        }
9406    };
9407    if hi >= n {
9408        hi = last;
9409    }
9410    if lo >= n {
9411        lo = last;
9412    }
9413    (lo, hi)
9414}
9415
9416/// Find the inclusive index of the first row with the same ORDER
9417/// BY key as `slice[i]`. Slice is already sorted by partition then
9418/// order, so peers are contiguous.
9419#[allow(clippy::type_complexity)]
9420fn peer_group_start(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
9421    let key = &slice[i].1;
9422    let mut j = i;
9423    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
9424        j -= 1;
9425    }
9426    j
9427}
9428
9429/// Find the inclusive index of the last row with the same ORDER
9430/// BY key as `slice[i]`.
9431#[allow(clippy::type_complexity)]
9432fn peer_group_end(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
9433    let key = &slice[i].1;
9434    let mut j = i;
9435    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
9436        j += 1;
9437    }
9438    j
9439}
9440
9441fn value_to_f64(v: &Value) -> Option<f64> {
9442    match v {
9443        Value::SmallInt(n) => Some(f64::from(*n)),
9444        Value::Int(n) => Some(f64::from(*n)),
9445        #[allow(clippy::cast_precision_loss)]
9446        Value::BigInt(n) => Some(*n as f64),
9447        Value::Float(x) => Some(*x),
9448        _ => None,
9449    }
9450}
9451
9452/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
9453/// projection / `order_by` — saves cloning the AST when there are
9454/// none (the common case).
9455fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
9456    let mut any = false;
9457    for item in &stmt.items {
9458        if let SelectItem::Expr { expr, .. } = item {
9459            any = any || expr_has_subquery(expr);
9460        }
9461    }
9462    if let Some(w) = &stmt.where_ {
9463        any = any || expr_has_subquery(w);
9464    }
9465    if let Some(h) = &stmt.having {
9466        any = any || expr_has_subquery(h);
9467    }
9468    for o in &stmt.order_by {
9469        any = any || expr_has_subquery(&o.expr);
9470    }
9471    for (_, peer) in &stmt.unions {
9472        any = any || expr_tree_has_subquery(peer);
9473    }
9474    any
9475}
9476
9477fn expr_has_subquery(e: &Expr) -> bool {
9478    match e {
9479        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
9480        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
9481        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9482            expr_has_subquery(expr)
9483        }
9484        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
9485        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
9486        Expr::Extract { source, .. } => expr_has_subquery(source),
9487        Expr::WindowFunction {
9488            args,
9489            partition_by,
9490            order_by,
9491            ..
9492        } => {
9493            args.iter().any(expr_has_subquery)
9494                || partition_by.iter().any(expr_has_subquery)
9495                || order_by.iter().any(|(e, _)| expr_has_subquery(e))
9496        }
9497        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
9498        Expr::Array(items) => items.iter().any(expr_has_subquery),
9499        Expr::ArraySubscript { target, index } => {
9500            expr_has_subquery(target) || expr_has_subquery(index)
9501        }
9502        Expr::AnyAll { expr, array, .. } => expr_has_subquery(expr) || expr_has_subquery(array),
9503        Expr::Case {
9504            operand,
9505            branches,
9506            else_branch,
9507        } => {
9508            operand.as_deref().is_some_and(expr_has_subquery)
9509                || branches
9510                    .iter()
9511                    .any(|(w, t)| expr_has_subquery(w) || expr_has_subquery(t))
9512                || else_branch.as_deref().is_some_and(expr_has_subquery)
9513        }
9514    }
9515}
9516
9517/// v4.10 helper: materialise a runtime `Value` back into an AST
9518/// `Expr::Literal` for the subquery-rewrite path. Supports the
9519/// types `Literal` can represent (Integer / Float / Text / Bool /
9520/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
9521/// would lose precision through Literal and aren't supported in
9522/// uncorrelated-subquery results; they error with a clear hint.
9523fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
9524    let lit = match v {
9525        Value::Null => Literal::Null,
9526        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
9527        Value::Int(n) => Literal::Integer(i64::from(n)),
9528        Value::BigInt(n) => Literal::Integer(n),
9529        Value::Float(x) => Literal::Float(x),
9530        Value::Text(s) | Value::Json(s) => Literal::String(s),
9531        Value::Bool(b) => Literal::Bool(b),
9532        other => {
9533            return Err(EngineError::Unsupported(alloc::format!(
9534                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
9535                other.data_type()
9536            )));
9537        }
9538    };
9539    Ok(Expr::Literal(lit))
9540}
9541
9542/// v7.13.0 — wider helper used by `INSERT … SELECT` (mailrs
9543/// round-5 G4). Covers the most common `Value` variants. Types
9544/// that need lossy textual round-trip (BYTEA, arrays, ts*)
9545/// surface as an Unsupported error so the caller can add a cast
9546/// in the inner SELECT.
9547fn value_to_literal_expr_permissive(v: Value) -> Result<Expr, EngineError> {
9548    let lit = match v {
9549        Value::Null => Literal::Null,
9550        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
9551        Value::Int(n) => Literal::Integer(i64::from(n)),
9552        Value::BigInt(n) => Literal::Integer(n),
9553        Value::Float(x) => Literal::Float(x),
9554        Value::Text(s) | Value::Json(s) => Literal::String(s),
9555        Value::Bool(b) => Literal::Bool(b),
9556        Value::Vector(xs) => Literal::Vector(xs),
9557        // Date / Timestamp / Timestamptz / Numeric round-trip
9558        // through a TEXT literal that `coerce_value` re-parses
9559        // against the target column type.
9560        Value::Date(days) => {
9561            let micros = (i64::from(days)) * 86_400_000_000;
9562            Literal::String(format_timestamp_micros_as_date(micros))
9563        }
9564        Value::Timestamp(us) => Literal::String(format_timestamp_micros(us)),
9565        Value::Numeric { scaled, scale } => Literal::String(format_numeric(scaled, scale)),
9566        other => {
9567            return Err(EngineError::Unsupported(alloc::format!(
9568                "INSERT … SELECT cannot materialise value of type {:?}; \
9569                 add an explicit CAST in the inner SELECT",
9570                other.data_type()
9571            )));
9572        }
9573    };
9574    Ok(Expr::Literal(lit))
9575}
9576
9577fn format_timestamp_micros(us: i64) -> String {
9578    // Same Y/M/D split used by the wire layer; epoch-relative.
9579    let days = us.div_euclid(86_400_000_000);
9580    let intra_day = us.rem_euclid(86_400_000_000);
9581    let date = format_timestamp_micros_as_date(days * 86_400_000_000);
9582    let secs = intra_day / 1_000_000;
9583    let us_rem = intra_day % 1_000_000;
9584    let h = (secs / 3600) % 24;
9585    let m = (secs / 60) % 60;
9586    let s = secs % 60;
9587    if us_rem == 0 {
9588        alloc::format!("{date} {h:02}:{m:02}:{s:02}")
9589    } else {
9590        alloc::format!("{date} {h:02}:{m:02}:{s:02}.{us_rem:06}")
9591    }
9592}
9593
9594fn format_timestamp_micros_as_date(us: i64) -> String {
9595    // Days since 1970-01-01 → calendar Y-M-D via the proleptic
9596    // Gregorian conversion used by spg-engine's date helpers.
9597    let days = us.div_euclid(86_400_000_000);
9598    // 1970-01-01 = JDN 2440588.
9599    let jdn = days + 2_440_588;
9600    let (y, mo, d) = jdn_to_ymd(jdn);
9601    alloc::format!("{y:04}-{mo:02}-{d:02}")
9602}
9603
9604fn jdn_to_ymd(jdn: i64) -> (i64, u32, u32) {
9605    // Fliegel & Van Flandern (1968) — works for all positive JDNs.
9606    let l = jdn + 68569;
9607    let n = (4 * l) / 146_097;
9608    let l = l - (146_097 * n + 3) / 4;
9609    let i = (4000 * (l + 1)) / 1_461_001;
9610    let l = l - (1461 * i) / 4 + 31;
9611    let j = (80 * l) / 2447;
9612    let day = (l - (2447 * j) / 80) as u32;
9613    let l = j / 11;
9614    let month = (j + 2 - 12 * l) as u32;
9615    let year = 100 * (n - 49) + i + l;
9616    (year, month, day)
9617}
9618
9619fn format_numeric(scaled: i128, scale: u8) -> String {
9620    if scale == 0 {
9621        return alloc::format!("{scaled}");
9622    }
9623    let abs = scaled.unsigned_abs();
9624    let divisor = 10u128.pow(u32::from(scale));
9625    let whole = abs / divisor;
9626    let frac = abs % divisor;
9627    let sign = if scaled < 0 { "-" } else { "" };
9628    alloc::format!("{sign}{whole}.{frac:0width$}", width = usize::from(scale))
9629}
9630
9631/// v6.1.1 — walk the prepared `Statement` AST and replace every
9632/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
9633/// params[n-1]))`. The dispatch downstream sees a `Statement`
9634/// indistinguishable from a simple-query parse, so the exec path
9635/// stays unchanged.
9636///
9637/// Errors fall into one shape: a `$N` references past the bound
9638/// `params.len()`. Out-of-range happens when the Bind didn't
9639/// supply enough values; pgwire surfaces this as a protocol error
9640/// to the client.
9641/// v7.15.0 — rewrite every (potentially-qualified) column
9642/// identifier matching `old` to `new` in a stored SQL source
9643/// string. Used by `ALTER TABLE … RENAME COLUMN` to patch
9644/// CHECK predicate sources, partial-index predicate sources,
9645/// and runtime DEFAULT expression sources before they get
9646/// re-parsed on the next INSERT/UPDATE.
9647///
9648/// Round-trips through the parser, so the rewritten output is
9649/// the canonical Display form (matches what the engine stores
9650/// for fresh predicates). If the source doesn't parse, surfaces
9651/// the parse error — the invariant that stored predicates are
9652/// in canonical Display form means a parse failure here is a
9653/// real bug, not a user mistake to swallow.
9654fn rewrite_column_in_source(
9655    src: &str,
9656    old: &str,
9657    new: &str,
9658) -> Result<alloc::string::String, EngineError> {
9659    let mut expr = spg_sql::parser::parse_expression(src).map_err(|e| {
9660        EngineError::Unsupported(alloc::format!(
9661            "ALTER TABLE RENAME COLUMN: stored predicate source {src:?} \
9662             failed to parse for rewrite ({e})"
9663        ))
9664    })?;
9665    rewrite_column_in_expr(&mut expr, old, new);
9666    Ok(alloc::format!("{expr}"))
9667}
9668
9669/// v7.15.0 — Expr walker that swaps `Expr::Column { name: old, .. }`
9670/// for `Expr::Column { name: new, .. }`. Qualifier is preserved
9671/// (e.g. `t.old` → `t.new`); a foreign-table qualifier still
9672/// gets rewritten because the AST has no way to tell us this
9673/// predicate is on table T versus table T2 — predicate sources
9674/// in SPG are always scoped to the owning table, so any
9675/// qualifier present is either redundant or wrong.
9676fn rewrite_column_in_expr(e: &mut Expr, old: &str, new: &str) {
9677    match e {
9678        Expr::Column(c) => {
9679            if c.name.eq_ignore_ascii_case(old) {
9680                c.name = new.to_string();
9681            }
9682        }
9683        Expr::Binary { lhs, rhs, .. } => {
9684            rewrite_column_in_expr(lhs, old, new);
9685            rewrite_column_in_expr(rhs, old, new);
9686        }
9687        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9688            rewrite_column_in_expr(expr, old, new);
9689        }
9690        Expr::FunctionCall { args, .. } => {
9691            for a in args {
9692                rewrite_column_in_expr(a, old, new);
9693            }
9694        }
9695        Expr::Like { expr, pattern, .. } => {
9696            rewrite_column_in_expr(expr, old, new);
9697            rewrite_column_in_expr(pattern, old, new);
9698        }
9699        Expr::Extract { source, .. } => rewrite_column_in_expr(source, old, new),
9700        Expr::WindowFunction {
9701            args,
9702            partition_by,
9703            order_by,
9704            ..
9705        } => {
9706            for a in args {
9707                rewrite_column_in_expr(a, old, new);
9708            }
9709            for p in partition_by {
9710                rewrite_column_in_expr(p, old, new);
9711            }
9712            for (o, _) in order_by {
9713                rewrite_column_in_expr(o, old, new);
9714            }
9715        }
9716        Expr::Array(items) => {
9717            for elem in items {
9718                rewrite_column_in_expr(elem, old, new);
9719            }
9720        }
9721        Expr::ArraySubscript { target, index } => {
9722            rewrite_column_in_expr(target, old, new);
9723            rewrite_column_in_expr(index, old, new);
9724        }
9725        Expr::AnyAll { expr, array, .. } => {
9726            rewrite_column_in_expr(expr, old, new);
9727            rewrite_column_in_expr(array, old, new);
9728        }
9729        Expr::Case {
9730            operand,
9731            branches,
9732            else_branch,
9733        } => {
9734            if let Some(o) = operand {
9735                rewrite_column_in_expr(o, old, new);
9736            }
9737            for (w, t) in branches {
9738                rewrite_column_in_expr(w, old, new);
9739                rewrite_column_in_expr(t, old, new);
9740            }
9741            if let Some(e) = else_branch {
9742                rewrite_column_in_expr(e, old, new);
9743            }
9744        }
9745        // Stored predicate sources never contain subqueries —
9746        // CHECK / partial-index / runtime_default are all scalar.
9747        // If a future feature changes that, recurse here.
9748        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => {}
9749        Expr::Literal(_) | Expr::Placeholder(_) => {}
9750    }
9751}
9752
9753/// v7.16.0 — walks a parsed statement and replaces every
9754/// `Expr::Placeholder(N)` with the corresponding `params[N-1]`
9755/// re-encoded as an `Expr::Literal`. Used internally by
9756/// `Engine::execute_prepared` AND surfaced for the spg-embedded
9757/// WAL path (which needs the bind-final AST so replay sees a
9758/// simple-query-shaped statement, not a `$1`-shaped one). Errors
9759/// when a placeholder references an index past the params slice.
9760pub fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
9761    match stmt {
9762        Statement::Select(s) => substitute_select(s, params)?,
9763        Statement::Insert(ins) => {
9764            for row in &mut ins.rows {
9765                for e in row {
9766                    substitute_expr(e, params)?;
9767                }
9768            }
9769        }
9770        Statement::Update(u) => {
9771            for (_, e) in &mut u.assignments {
9772                substitute_expr(e, params)?;
9773            }
9774            if let Some(w) = &mut u.where_ {
9775                substitute_expr(w, params)?;
9776            }
9777        }
9778        Statement::Delete(d) => {
9779            if let Some(w) = &mut d.where_ {
9780                substitute_expr(w, params)?;
9781            }
9782        }
9783        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
9784        // Other statements (CREATE / BEGIN / SHOW / …) have no
9785        // expression slots; no walk needed.
9786        _ => {}
9787    }
9788    Ok(())
9789}
9790
9791fn substitute_select(s: &mut SelectStatement, params: &[Value]) -> Result<(), EngineError> {
9792    for item in &mut s.items {
9793        if let SelectItem::Expr { expr, .. } = item {
9794            substitute_expr(expr, params)?;
9795        }
9796    }
9797    if let Some(w) = &mut s.where_ {
9798        substitute_expr(w, params)?;
9799    }
9800    if let Some(gs) = &mut s.group_by {
9801        for g in gs {
9802            substitute_expr(g, params)?;
9803        }
9804    }
9805    if let Some(h) = &mut s.having {
9806        substitute_expr(h, params)?;
9807    }
9808    for o in &mut s.order_by {
9809        substitute_expr(&mut o.expr, params)?;
9810    }
9811    for (_, peer) in &mut s.unions {
9812        substitute_select(peer, params)?;
9813    }
9814    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
9815    // mailrs H2. After this pass each LIMIT/OFFSET that was a
9816    // Placeholder is rewritten to Literal so the existing
9817    // `LimitExpr::as_literal` path consumes a concrete u32.
9818    if let Some(le) = s.limit {
9819        s.limit = Some(resolve_limit_placeholder(le, params)?);
9820    }
9821    if let Some(le) = s.offset {
9822        s.offset = Some(resolve_limit_placeholder(le, params)?);
9823    }
9824    Ok(())
9825}
9826
9827fn resolve_limit_placeholder(
9828    le: spg_sql::ast::LimitExpr,
9829    params: &[Value],
9830) -> Result<spg_sql::ast::LimitExpr, EngineError> {
9831    use spg_sql::ast::LimitExpr;
9832    match le {
9833        LimitExpr::Literal(_) => Ok(le),
9834        LimitExpr::Placeholder(n) => {
9835            let idx = usize::from(n).saturating_sub(1);
9836            let v = params.get(idx).ok_or_else(|| {
9837                EngineError::Eval(EvalError::PlaceholderOutOfRange {
9838                    n,
9839                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
9840                })
9841            })?;
9842            let int = match v {
9843                Value::SmallInt(x) => Some(i64::from(*x)),
9844                Value::Int(x) => Some(i64::from(*x)),
9845                Value::BigInt(x) => Some(*x),
9846                _ => None,
9847            }
9848            .ok_or_else(|| {
9849                EngineError::Unsupported(alloc::format!(
9850                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
9851                ))
9852            })?;
9853            if int < 0 {
9854                return Err(EngineError::Unsupported(alloc::format!(
9855                    "LIMIT/OFFSET ${n} bound to negative value {int}"
9856                )));
9857            }
9858            let bounded = u32::try_from(int).map_err(|_| {
9859                EngineError::Unsupported(alloc::format!(
9860                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
9861                ))
9862            })?;
9863            Ok(LimitExpr::Literal(bounded))
9864        }
9865    }
9866}
9867
9868fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
9869    if let Expr::Placeholder(n) = e {
9870        let idx = usize::from(*n).saturating_sub(1);
9871        let v = params.get(idx).ok_or_else(|| {
9872            EngineError::Eval(EvalError::PlaceholderOutOfRange {
9873                n: *n,
9874                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
9875            })
9876        })?;
9877        *e = Expr::Literal(value_to_literal(v.clone()));
9878        return Ok(());
9879    }
9880    match e {
9881        Expr::Binary { lhs, rhs, .. } => {
9882            substitute_expr(lhs, params)?;
9883            substitute_expr(rhs, params)?;
9884        }
9885        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9886            substitute_expr(expr, params)?;
9887        }
9888        Expr::FunctionCall { args, .. } => {
9889            for a in args {
9890                substitute_expr(a, params)?;
9891            }
9892        }
9893        Expr::Like { expr, pattern, .. } => {
9894            substitute_expr(expr, params)?;
9895            substitute_expr(pattern, params)?;
9896        }
9897        Expr::Extract { source, .. } => substitute_expr(source, params)?,
9898        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
9899        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
9900        Expr::InSubquery { expr, subquery, .. } => {
9901            substitute_expr(expr, params)?;
9902            substitute_select(subquery, params)?;
9903        }
9904        Expr::WindowFunction {
9905            args,
9906            partition_by,
9907            order_by,
9908            ..
9909        } => {
9910            for a in args {
9911                substitute_expr(a, params)?;
9912            }
9913            for p in partition_by {
9914                substitute_expr(p, params)?;
9915            }
9916            for (e, _) in order_by {
9917                substitute_expr(e, params)?;
9918            }
9919        }
9920        Expr::Literal(_) | Expr::Column(_) => {}
9921        // Already handled above.
9922        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
9923        Expr::Array(items) => {
9924            for elem in items {
9925                substitute_expr(elem, params)?;
9926            }
9927        }
9928        Expr::ArraySubscript { target, index } => {
9929            substitute_expr(target, params)?;
9930            substitute_expr(index, params)?;
9931        }
9932        Expr::AnyAll { expr, array, .. } => {
9933            substitute_expr(expr, params)?;
9934            substitute_expr(array, params)?;
9935        }
9936        Expr::Case {
9937            operand,
9938            branches,
9939            else_branch,
9940        } => {
9941            if let Some(o) = operand {
9942                substitute_expr(o, params)?;
9943            }
9944            for (w, t) in branches {
9945                substitute_expr(w, params)?;
9946                substitute_expr(t, params)?;
9947            }
9948            if let Some(e) = else_branch {
9949                substitute_expr(e, params)?;
9950            }
9951        }
9952    }
9953    Ok(())
9954}
9955
9956/// v6.1.1 — convert a runtime `Value` into the closest matching
9957/// `Literal` for the substitute walker. Lossless for the simple
9958/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
9959/// / Json / Interval render as their canonical text form so the
9960/// downstream coerce_value can re-parse against the target column
9961/// type. SQ8 / HalfVector cells are NOT expected as bind params;
9962/// pgwire's Bind decodes vector params to the f32 representation
9963/// before they reach this helper.
9964/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
9965/// column's non-NULL sample before histogram building. Cross-type
9966/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
9967/// same widening the eval-side `compare` operator uses; everything
9968/// else (the genuinely-incompatible pairs) falls back to ordering
9969/// by canonical string form so the sort is still total + stable.
9970/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
9971/// here only via the string-fallback path because vector columns
9972/// are filtered out upstream.
9973fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
9974    use core::cmp::Ordering;
9975    match (a, b) {
9976        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
9977        (Value::Int(a), Value::Int(b)) => a.cmp(b),
9978        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
9979        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
9980        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
9981        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9982        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
9983        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9984        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
9985        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
9986        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
9987        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
9988        (Value::Date(a), Value::Date(b)) => a.cmp(b),
9989        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
9990        // Mixed numeric/float — widen to f64 and compare.
9991        (Value::SmallInt(n), Value::Float(x)) => {
9992            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9993        }
9994        (Value::Float(x), Value::SmallInt(n)) => {
9995            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
9996        }
9997        (Value::Int(n), Value::Float(x)) => {
9998            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9999        }
10000        (Value::Float(x), Value::Int(n)) => {
10001            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
10002        }
10003        (Value::BigInt(n), Value::Float(x)) => {
10004            #[allow(clippy::cast_precision_loss)]
10005            let nf = *n as f64;
10006            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
10007        }
10008        (Value::Float(x), Value::BigInt(n)) => {
10009            #[allow(clippy::cast_precision_loss)]
10010            let nf = *n as f64;
10011            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
10012        }
10013        // Cross-type fallback: lexicographic on canonical form.
10014        // Total + stable so the sort is well-defined.
10015        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
10016    }
10017}
10018
10019/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
10020/// string for the `spg_statistic.histogram_bounds` column. Values
10021/// containing `,` or `[` / `]` are JSON-style escaped so the
10022/// rendering round-trips through a future parser; v6.2.0 only
10023/// uses the rendered form for human consumption, so the escaping
10024/// is conservative.
10025fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
10026    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
10027    out.push('[');
10028    for (i, b) in bounds.iter().enumerate() {
10029        if i > 0 {
10030            out.push_str(", ");
10031        }
10032        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
10033        if needs_quote {
10034            out.push('"');
10035            for ch in b.chars() {
10036                if ch == '"' || ch == '\\' {
10037                    out.push('\\');
10038                }
10039                out.push(ch);
10040            }
10041            out.push('"');
10042        } else {
10043            out.push_str(b);
10044        }
10045    }
10046    out.push(']');
10047    out
10048}
10049
10050/// v6.2.0 — canonical textual form of a `Value` for histogram
10051/// bound storage. Strings used by ANALYZE for sort + bound output.
10052/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
10053/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
10054/// the same form `format_date` / `format_timestamp` produce for
10055/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
10056/// reach this only via a non-Vector column (vector columns are
10057/// skipped upstream); they fall back to a Debug-derived form so
10058/// stats still serialise without crashing.
10059pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
10060    match v {
10061        Value::Null => "NULL".to_string(),
10062        Value::SmallInt(n) => alloc::format!("{n}"),
10063        Value::Int(n) => alloc::format!("{n}"),
10064        Value::BigInt(n) => alloc::format!("{n}"),
10065        Value::Float(x) => alloc::format!("{x:?}"),
10066        Value::Text(s) | Value::Json(s) => s.clone(),
10067        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
10068        Value::Date(d) => eval::format_date(*d),
10069        Value::Timestamp(t) => eval::format_timestamp(*t),
10070        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
10071        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
10072        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
10073            // Unreachable in practice (vector columns are filtered
10074            // out before this). Defensive fallback so a future
10075            // vector-stats path doesn't crash.
10076            alloc::format!("{v:?}")
10077        }
10078        // v7.5.0 — Value is #[non_exhaustive] for downstream
10079        // forward-compat. Future variants fall through to Debug
10080        // form here (same shape as the vector fallback above).
10081        _ => alloc::format!("{v:?}"),
10082    }
10083}
10084
10085/// v6.2.0 — true for engine-managed catalog tables that the bare
10086/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
10087/// tables yet (publications / subscriptions / users / statistics
10088/// all live as engine fields, not catalog tables), so this is a
10089/// reserved future-proofing hook — every existing user table is
10090/// analysed.
10091const fn is_internal_table_name(_name: &str) -> bool {
10092    false
10093}
10094
10095fn value_to_literal(v: Value) -> Literal {
10096    match v {
10097        Value::Null => Literal::Null,
10098        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
10099        Value::Int(n) => Literal::Integer(i64::from(n)),
10100        Value::BigInt(n) => Literal::Integer(n),
10101        Value::Float(x) => Literal::Float(x),
10102        Value::Text(s) | Value::Json(s) => Literal::String(s),
10103        Value::Bool(b) => Literal::Bool(b),
10104        Value::Vector(v) => Literal::Vector(v),
10105        Value::Numeric { scaled, scale } => Literal::String(eval::format_numeric(scaled, scale)),
10106        Value::Date(d) => Literal::String(eval::format_date(d)),
10107        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
10108        // v7.16.0 — BYTEA round-trip for the spg-sqlx Bind path.
10109        // PG-canonical text rep is `\x` + lowercase hex; the
10110        // engine's coerce_value already accepts that on the
10111        // text → bytea direction.
10112        Value::Bytes(b) => Literal::String(eval::format_bytea_hex(&b)),
10113        // v7.16.0 — array round-trip for the spg-sqlx Bind
10114        // path. Render as PG external form `{a,b,c}`; the
10115        // engine's text → array coerce (just below in
10116        // coerce_value) accepts it on the matching column type.
10117        Value::TextArray(items) => Literal::String(eval::format_text_array(&items)),
10118        Value::IntArray(items) => Literal::String(eval::format_int_array(&items)),
10119        Value::BigIntArray(items) => Literal::String(eval::format_bigint_array(&items)),
10120        Value::Interval { months, micros } => Literal::Interval {
10121            months,
10122            micros,
10123            text: eval::format_interval(months, micros),
10124        },
10125        // SQ8 / halfvec cells dequantise to f32 before reaching the
10126        // substitute walker; pgwire's Bind path handles that.
10127        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
10128        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
10129        // v7.5.0 — Value is #[non_exhaustive]; future variants
10130        // render as Debug-form String literal until explicit
10131        // mapping is added.
10132        v => Literal::String(alloc::format!("{v:?}")),
10133    }
10134}
10135
10136fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
10137    let Some(now) = now_micros else {
10138        return;
10139    };
10140    match stmt {
10141        Statement::Select(s) => rewrite_select_clock(s, now),
10142        Statement::Insert(ins) => {
10143            for row in &mut ins.rows {
10144                for e in row {
10145                    rewrite_expr_clock(e, now);
10146                }
10147            }
10148        }
10149        _ => {}
10150    }
10151}
10152
10153fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
10154    for item in &mut s.items {
10155        if let SelectItem::Expr { expr, .. } = item {
10156            rewrite_expr_clock(expr, now);
10157        }
10158    }
10159    if let Some(w) = &mut s.where_ {
10160        rewrite_expr_clock(w, now);
10161    }
10162    if let Some(gs) = &mut s.group_by {
10163        for g in gs {
10164            rewrite_expr_clock(g, now);
10165        }
10166    }
10167    if let Some(h) = &mut s.having {
10168        rewrite_expr_clock(h, now);
10169    }
10170    for o in &mut s.order_by {
10171        rewrite_expr_clock(&mut o.expr, now);
10172    }
10173    for (_, peer) in &mut s.unions {
10174        rewrite_select_clock(peer, now);
10175    }
10176}
10177
10178/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
10179/// Literal / Column-with-qualifier (the dominant cases on a typical
10180/// AST) take a single pattern dispatch and exit. The clock-rewrite
10181/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
10182/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
10183/// refs) sit on their own arms with match guards so the fall-through
10184/// to the recursive arms is unambiguous.
10185fn rewrite_expr_clock(e: &mut Expr, now: i64) {
10186    // Fast-path test on the no-recursion shapes first. We can't fold
10187    // them into the big match below because they need to *replace* `e`
10188    // outright; the recursive arms below match on its sub-fields.
10189    if let Some(replacement) = clock_replacement_for(e, now) {
10190        *e = replacement;
10191        return;
10192    }
10193    match e {
10194        Expr::Binary { lhs, rhs, .. } => {
10195            rewrite_expr_clock(lhs, now);
10196            rewrite_expr_clock(rhs, now);
10197        }
10198        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
10199            rewrite_expr_clock(expr, now);
10200        }
10201        Expr::FunctionCall { args, .. } => {
10202            for a in args {
10203                rewrite_expr_clock(a, now);
10204            }
10205        }
10206        Expr::Like { expr, pattern, .. } => {
10207            rewrite_expr_clock(expr, now);
10208            rewrite_expr_clock(pattern, now);
10209        }
10210        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
10211        // v4.10 subquery nodes — recurse into the inner SELECT's
10212        // expression slots so e.g. SELECT NOW() in a scalar
10213        // subquery picks up the same instant as the outer query.
10214        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
10215        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
10216        Expr::InSubquery { expr, subquery, .. } => {
10217            rewrite_expr_clock(expr, now);
10218            rewrite_select_clock(subquery, now);
10219        }
10220        // v4.12 window functions — args + PARTITION BY + ORDER BY
10221        // may all reference clock literals.
10222        Expr::WindowFunction {
10223            args,
10224            partition_by,
10225            order_by,
10226            ..
10227        } => {
10228            for a in args {
10229                rewrite_expr_clock(a, now);
10230            }
10231            for p in partition_by {
10232                rewrite_expr_clock(p, now);
10233            }
10234            for (e, _) in order_by {
10235                rewrite_expr_clock(e, now);
10236            }
10237        }
10238        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
10239        Expr::Array(items) => {
10240            for elem in items {
10241                rewrite_expr_clock(elem, now);
10242            }
10243        }
10244        Expr::ArraySubscript { target, index } => {
10245            rewrite_expr_clock(target, now);
10246            rewrite_expr_clock(index, now);
10247        }
10248        Expr::AnyAll { expr, array, .. } => {
10249            rewrite_expr_clock(expr, now);
10250            rewrite_expr_clock(array, now);
10251        }
10252        Expr::Case {
10253            operand,
10254            branches,
10255            else_branch,
10256        } => {
10257            if let Some(o) = operand {
10258                rewrite_expr_clock(o, now);
10259            }
10260            for (w, t) in branches {
10261                rewrite_expr_clock(w, now);
10262                rewrite_expr_clock(t, now);
10263            }
10264            if let Some(e) = else_branch {
10265                rewrite_expr_clock(e, now);
10266            }
10267        }
10268    }
10269}
10270
10271/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
10272/// must be rewritten; otherwise `None` so the caller falls through to
10273/// the recursive walk. Identifies both function-call forms (`NOW()` /
10274/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
10275/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
10276/// which is how PG accepts them without parens).
10277fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
10278    let (kind, name) = match e {
10279        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
10280        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
10281        _ => return None,
10282    };
10283    // ASCII case-insensitive name match. Limited to the three keywords
10284    // that actually need rewriting.
10285    let matched = match name.len() {
10286        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => Some(true),
10287        12 if name.eq_ignore_ascii_case("current_date") => Some(false),
10288        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(true),
10289        _ => None,
10290    };
10291    let is_timestamp = matched?;
10292    let payload = if is_timestamp {
10293        now
10294    } else {
10295        now.div_euclid(86_400_000_000)
10296    };
10297    let target = if is_timestamp {
10298        spg_sql::ast::CastTarget::Timestamp
10299    } else {
10300        spg_sql::ast::CastTarget::Date
10301    };
10302    Some(Expr::Cast {
10303        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
10304        target,
10305    })
10306}
10307
10308#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10309enum ClockSite {
10310    Fn,
10311    BareIdent,
10312}
10313
10314/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
10315/// Swap the integer literal for the matching item's expression so the
10316/// executor doesn't need a special-case branch. Recurses into UNION
10317/// peers because each peer keeps its own SELECT list.
10318/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
10319/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
10320/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
10321/// and groups by whatever explicit non-aggregates remain — none in
10322/// the wildcard-only case, which still works for non-aggregate
10323/// queries).
10324fn expand_group_by_all(s: &mut SelectStatement) {
10325    if !s.group_by_all {
10326        for (_, peer) in &mut s.unions {
10327            expand_group_by_all(peer);
10328        }
10329        return;
10330    }
10331    let mut groups: Vec<Expr> = Vec::new();
10332    for item in &s.items {
10333        if let SelectItem::Expr { expr, .. } = item
10334            && !aggregate::contains_aggregate(expr)
10335        {
10336            groups.push(expr.clone());
10337        }
10338    }
10339    s.group_by = Some(groups);
10340    s.group_by_all = false;
10341    for (_, peer) in &mut s.unions {
10342        expand_group_by_all(peer);
10343    }
10344}
10345
10346fn resolve_order_by_position(s: &mut SelectStatement) {
10347    // v6.4.0 — iterate every ORDER BY key. Position references
10348    // (`ORDER BY 2`) bind to the 1-based projection index;
10349    // identifier references that match a SELECT-list alias bind to
10350    // the projected expression (Step 4 of L3a).
10351    for order in &mut s.order_by {
10352        match &order.expr {
10353            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
10354                if let Ok(idx_one_based) = usize::try_from(*n) {
10355                    let idx = idx_one_based - 1;
10356                    if idx < s.items.len()
10357                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
10358                    {
10359                        order.expr = expr.clone();
10360                    }
10361                }
10362            }
10363            Expr::Column(c) if c.qualifier.is_none() => {
10364                // Alias-in-ORDER-BY lookup.
10365                for item in &s.items {
10366                    if let SelectItem::Expr {
10367                        expr,
10368                        alias: Some(a),
10369                    } = item
10370                        && a == &c.name
10371                    {
10372                        order.expr = expr.clone();
10373                        break;
10374                    }
10375                }
10376            }
10377            _ => {}
10378        }
10379    }
10380    for (_, peer) in &mut s.unions {
10381        resolve_order_by_position(peer);
10382    }
10383}
10384
10385/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
10386/// Used by the UNION ORDER BY path; per-block paths inline the same
10387/// comparator because they already hold `&OrderBy` directly.
10388/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
10389/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
10390/// partition the prefix in O(n), then sort just that prefix in O(k
10391/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
10392/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
10393/// full-sort behaviour.
10394///
10395/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
10396/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
10397fn partial_sort_tagged(tagged: &mut Vec<(Vec<f64>, Row)>, keep: Option<usize>, descs: &[bool]) {
10398    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
10399    match keep {
10400        Some(k) if k < tagged.len() && k > 0 => {
10401            let pivot = k - 1;
10402            tagged.select_nth_unstable_by(pivot, cmp);
10403            tagged[..k].sort_by(cmp);
10404            tagged.truncate(k);
10405        }
10406        _ => {
10407            tagged.sort_by(cmp);
10408        }
10409    }
10410}
10411
10412fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
10413    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
10414}
10415
10416/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
10417/// flag is honored independently. NULL is encoded as `f64::INFINITY`
10418/// so it sorts last in ASC and first in DESC (matches PG default).
10419fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
10420    use core::cmp::Ordering;
10421    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
10422        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
10423        let ord = if descs.get(i).copied().unwrap_or(false) {
10424            ord.reverse()
10425        } else {
10426            ord
10427        };
10428        if ord != Ordering::Equal {
10429            return ord;
10430        }
10431    }
10432    Ordering::Equal
10433}
10434
10435/// v6.4.0 — eval every ORDER BY expression for a row and pack the
10436/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
10437fn build_order_keys(
10438    order_by: &[OrderBy],
10439    row: &Row,
10440    ctx: &EvalContext,
10441) -> Result<Vec<f64>, EngineError> {
10442    let mut keys = Vec::with_capacity(order_by.len());
10443    for o in order_by {
10444        let v = eval::eval_expr(&o.expr, row, ctx)?;
10445        keys.push(value_to_order_key(&v)?);
10446    }
10447    Ok(keys)
10448}
10449
10450/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
10451/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
10452/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
10453fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
10454    if let Some(off) = offset {
10455        let off = off as usize;
10456        if off >= rows.len() {
10457            rows.clear();
10458        } else {
10459            rows.drain(..off);
10460        }
10461    }
10462    if let Some(n) = limit {
10463        rows.truncate(n as usize);
10464    }
10465}
10466
10467/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
10468/// names + parent table name) into the storage-layer shape (column
10469/// indices + same parent table). Validates everything the engine
10470/// needs to know about the FK at CREATE TABLE time:
10471///
10472///   - parent table exists (catalog lookup, unless self-referencing)
10473///   - parent columns exist on the parent table
10474///   - parent column list matches the local arity (defaults to the
10475///     parent's primary index column when omitted)
10476///   - parent columns are covered by a `BTree` UNIQUE-class index
10477///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
10478///     the v7.6.2 INSERT path can do an O(log n) parent lookup
10479///   - local columns exist on the table being created
10480fn resolve_foreign_key(
10481    local_table_name: &str,
10482    local_cols: &[ColumnSchema],
10483    fk: spg_sql::ast::ForeignKeyConstraint,
10484    catalog: &Catalog,
10485) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
10486    // Resolve local columns.
10487    let mut local_columns = Vec::with_capacity(fk.columns.len());
10488    for name in &fk.columns {
10489        let pos = local_cols
10490            .iter()
10491            .position(|c| c.name == *name)
10492            .ok_or_else(|| {
10493                EngineError::Unsupported(alloc::format!(
10494                    "FOREIGN KEY references unknown local column {name:?}"
10495                ))
10496            })?;
10497        local_columns.push(pos);
10498    }
10499    // Self-referencing FK: parent table is the one we're creating.
10500    // The parent column resolution uses the local column list since
10501    // the catalog doesn't have this table yet.
10502    let is_self_ref = fk.parent_table == local_table_name;
10503    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
10504        (local_cols, local_table_name)
10505    } else {
10506        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
10507            EngineError::Storage(StorageError::TableNotFound {
10508                name: fk.parent_table.clone(),
10509            })
10510        })?;
10511        (
10512            parent_table.schema().columns.as_slice(),
10513            fk.parent_table.as_str(),
10514        )
10515    };
10516    // Resolve parent column names → positions. If the FK omitted the
10517    // parent column list, fall back to the parent's primary index
10518    // column (single-column only — composite default is rejected
10519    // because there's no unambiguous "PK" in SPG's index list).
10520    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
10521        if fk.columns.len() != 1 {
10522            return Err(EngineError::Unsupported(
10523                "composite FOREIGN KEY without explicit parent column list is not supported \
10524                 — list the parent columns explicitly"
10525                    .into(),
10526            ));
10527        }
10528        // Find a single BTree index on the parent and use its column.
10529        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
10530            .ok_or_else(|| {
10531                EngineError::Unsupported(alloc::format!(
10532                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
10533                     to default the FOREIGN KEY against"
10534                ))
10535            })?;
10536        alloc::vec![pos]
10537    } else {
10538        let mut out = Vec::with_capacity(fk.parent_columns.len());
10539        for name in &fk.parent_columns {
10540            let pos = parent_cols_for_lookup
10541                .iter()
10542                .position(|c| c.name == *name)
10543                .ok_or_else(|| {
10544                    EngineError::Unsupported(alloc::format!(
10545                        "FOREIGN KEY references unknown parent column \
10546                         {name:?} on table {parent_table_str:?}"
10547                    ))
10548                })?;
10549            out.push(pos);
10550        }
10551        out
10552    };
10553    if parent_columns.len() != local_columns.len() {
10554        return Err(EngineError::Unsupported(alloc::format!(
10555            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
10556            local_columns.len(),
10557            parent_columns.len()
10558        )));
10559    }
10560    // For non-self-referencing FKs, verify the parent column set is
10561    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
10562    // declaration; the convention is "the parent column for FK
10563    // purposes must have a BTree index" — which the user creates via
10564    // `CREATE INDEX ... USING btree (col)` (the default). We accept
10565    // any single-column BTree index that covers a parent column;
10566    // composite parent column lists require an index whose `column_position`
10567    // matches the first parent column (multi-column BTree indices
10568    // are not in the v7.x roadmap).
10569    if !is_self_ref {
10570        let parent_table = catalog.get(&fk.parent_table).expect("checked above");
10571        let primary_parent_col = parent_columns[0];
10572        let has_btree = parent_table
10573            .schema()
10574            .columns
10575            .get(primary_parent_col)
10576            .is_some()
10577            && parent_table.indices().iter().any(|idx| {
10578                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10579                    && idx.column_position == primary_parent_col
10580                    && idx.partial_predicate.is_none()
10581            });
10582        if !has_btree {
10583            return Err(EngineError::Unsupported(alloc::format!(
10584                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
10585                 index — create one with `CREATE INDEX ... ON {} ({})` first",
10586                parent_table_str,
10587                parent_table_str,
10588                parent_table.schema().columns[primary_parent_col].name,
10589            )));
10590        }
10591    }
10592    let on_delete = fk_action_sql_to_storage(fk.on_delete);
10593    let on_update = fk_action_sql_to_storage(fk.on_update);
10594    Ok(spg_storage::ForeignKeyConstraint {
10595        name: fk.name,
10596        local_columns,
10597        parent_table: fk.parent_table,
10598        parent_columns,
10599        on_delete,
10600        on_update,
10601    })
10602}
10603
10604/// v7.6.1 — pick a sentinel "primary key" column from the parent
10605/// table when the FK didn't name parent columns. Picks the first
10606/// single-column unconditional BTree index — that's the closest
10607/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
10608/// `local_cols` as the column source.
10609fn pick_pk_index_column(
10610    catalog: &Catalog,
10611    parent_name: &str,
10612    is_self_ref: bool,
10613    local_cols: &[ColumnSchema],
10614) -> Option<usize> {
10615    if is_self_ref {
10616        // Self-ref FK omitted parent columns: pick column 0 by
10617        // convention (no catalog entry yet). Engine will widen this
10618        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
10619        let _ = local_cols;
10620        return Some(0);
10621    }
10622    let parent = catalog.get(parent_name)?;
10623    parent.indices().iter().find_map(|idx| {
10624        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10625            && idx.partial_predicate.is_none()
10626            && idx.included_columns.is_empty()
10627            && idx.expression.is_none()
10628        {
10629            Some(idx.column_position)
10630        } else {
10631            None
10632        }
10633    })
10634}
10635
10636/// v7.9.8 / v7.9.10 — resolve the column positions that
10637/// identify a conflict for ON CONFLICT. Returns a Vec of
10638/// column positions (1 element for single-column form, N for
10639/// composite). When the user wrote bare `ON CONFLICT DO …`,
10640/// falls back to the table's first unconditional BTree index
10641/// (always single-column today).
10642fn resolve_on_conflict_columns(
10643    catalog: &Catalog,
10644    table_name: &str,
10645    target: &[String],
10646) -> Result<Vec<usize>, EngineError> {
10647    let table = catalog.get(table_name).ok_or_else(|| {
10648        EngineError::Storage(StorageError::TableNotFound {
10649            name: table_name.into(),
10650        })
10651    })?;
10652    if target.is_empty() {
10653        // v7.13.2 — mailrs round-6 S5 follow-up. Composite UNIQUE
10654        // constraints carry a multi-column tuple; the prior code
10655        // path picked only the leading column of the first BTree
10656        // index, which caused `ON CONFLICT DO NOTHING` to dedup
10657        // by leading column alone (3 rows with same group_id but
10658        // different permission collapsed to 1). PG semantics use
10659        // the full tuple. Prefer a UniquenessConstraint's full
10660        // column list when one exists; fall back to the leading
10661        // BTree column for legacy single-column UNIQUE.
10662        if let Some(uc) = table.schema().uniqueness_constraints.first() {
10663            return Ok(uc.columns.clone());
10664        }
10665        let pos = table
10666            .indices()
10667            .iter()
10668            .find_map(|idx| {
10669                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10670                    && idx.partial_predicate.is_none()
10671                    && idx.included_columns.is_empty()
10672                    && idx.expression.is_none()
10673                {
10674                    Some(idx.column_position)
10675                } else {
10676                    None
10677                }
10678            })
10679            .ok_or_else(|| {
10680                EngineError::Unsupported(alloc::format!(
10681                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
10682                ))
10683            })?;
10684        return Ok(alloc::vec![pos]);
10685    }
10686    let mut out = Vec::with_capacity(target.len());
10687    for name in target {
10688        let pos = table
10689            .schema()
10690            .columns
10691            .iter()
10692            .position(|c| c.name == *name)
10693            .ok_or_else(|| {
10694                EngineError::Unsupported(alloc::format!(
10695                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
10696                ))
10697            })?;
10698        out.push(pos);
10699    }
10700    Ok(out)
10701}
10702
10703/// v7.9.8 — check whether the BTree index on `column_pos` of
10704/// `table_name` already has a row with this key.
10705fn on_conflict_key_exists(
10706    catalog: &Catalog,
10707    table_name: &str,
10708    column_pos: usize,
10709    key: &Value,
10710) -> bool {
10711    let Some(table) = catalog.get(table_name) else {
10712        return false;
10713    };
10714    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
10715        return false;
10716    };
10717    table.indices().iter().any(|idx| {
10718        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10719            && idx.column_position == column_pos
10720            && idx.partial_predicate.is_none()
10721            && !idx.lookup_eq(&idx_key).is_empty()
10722    })
10723}
10724
10725/// v7.9.9 / v7.9.10 — look up an existing row's position by
10726/// matching all `column_positions` against the incoming `key`
10727/// tuple. Single-column shape (one column) reduces to the
10728/// canonical PK lookup; composite shapes scan linearly until
10729/// every position matches.
10730fn lookup_row_position_by_keys(
10731    catalog: &Catalog,
10732    table_name: &str,
10733    column_positions: &[usize],
10734    key: &[&Value],
10735) -> Option<usize> {
10736    let table = catalog.get(table_name)?;
10737    table.rows().iter().position(|r| {
10738        column_positions
10739            .iter()
10740            .enumerate()
10741            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
10742    })
10743}
10744
10745/// v7.9.10 — does the table already contain a row whose
10746/// `column_positions` tuple equals `key`? Single-column shape
10747/// uses the existing BTree fast path; composite shapes fall
10748/// back to a row scan.
10749fn on_conflict_keys_exist(
10750    catalog: &Catalog,
10751    table_name: &str,
10752    column_positions: &[usize],
10753    key: &[&Value],
10754) -> bool {
10755    if column_positions.len() == 1 {
10756        return on_conflict_key_exists(catalog, table_name, column_positions[0], key[0]);
10757    }
10758    let Some(table) = catalog.get(table_name) else {
10759        return false;
10760    };
10761    table.rows().iter().any(|r| {
10762        column_positions
10763            .iter()
10764            .enumerate()
10765            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
10766    })
10767}
10768
10769/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
10770/// existing row.
10771///
10772/// `incoming` is the rejected INSERT row (used to resolve
10773/// `EXCLUDED.col` references in the assignment exprs);
10774/// `target_pos` is the position of the existing row in the table.
10775/// Each assignment substitutes `EXCLUDED.col` with the matching
10776/// incoming value, evaluates the resulting expression against
10777/// the existing row, and writes the new value into the
10778/// corresponding column of the returned `Vec<Value>`. If
10779/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
10780/// the conflicting row is silently kept unchanged.
10781fn apply_on_conflict_assignments(
10782    catalog: &Catalog,
10783    table_name: &str,
10784    target_pos: usize,
10785    incoming: &[Value],
10786    assignments: &[(String, Expr)],
10787    where_: Option<&Expr>,
10788) -> Result<Option<Vec<Value>>, EngineError> {
10789    let table = catalog.get(table_name).ok_or_else(|| {
10790        EngineError::Storage(StorageError::TableNotFound {
10791            name: table_name.into(),
10792        })
10793    })?;
10794    let schema_cols = table.schema().columns.clone();
10795    let existing = table
10796        .rows()
10797        .get(target_pos)
10798        .ok_or_else(|| {
10799            EngineError::Unsupported(alloc::format!(
10800                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
10801            ))
10802        })?
10803        .clone();
10804    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
10805    // Optional WHERE filter on the conflict row.
10806    if let Some(w) = where_ {
10807        let pred = w.clone();
10808        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
10809        let v = eval::eval_expr(&pred, &existing, &ctx)?;
10810        if !matches!(v, Value::Bool(true)) {
10811            return Ok(None);
10812        }
10813    }
10814    let mut new_values = existing.values.clone();
10815    for (col_name, expr) in assignments {
10816        let target_idx = schema_cols
10817            .iter()
10818            .position(|c| c.name == *col_name)
10819            .ok_or_else(|| {
10820                EngineError::Eval(EvalError::ColumnNotFound {
10821                    name: col_name.clone(),
10822                })
10823            })?;
10824        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
10825        let v = eval::eval_expr(&sub, &existing, &ctx)?;
10826        new_values[target_idx] = coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
10827    }
10828    Ok(Some(new_values))
10829}
10830
10831/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
10832/// "EXCLUDED", name }` reference with a `Literal` of the matching
10833/// value from the incoming-row vec. Resolution against the
10834/// child-table column list (by name).
10835fn substitute_excluded_refs(expr: Expr, schema_cols: &[ColumnSchema], incoming: &[Value]) -> Expr {
10836    use spg_sql::ast::ColumnName;
10837    match expr {
10838        Expr::Column(ColumnName { qualifier, name })
10839            if qualifier
10840                .as_deref()
10841                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
10842        {
10843            let pos = schema_cols.iter().position(|c| c.name == name);
10844            match pos {
10845                Some(p) => {
10846                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
10847                    value_to_literal_expr(v)
10848                        .unwrap_or_else(|_| Expr::Literal(spg_sql::ast::Literal::Null))
10849                }
10850                None => Expr::Column(ColumnName { qualifier, name }),
10851            }
10852        }
10853        Expr::Binary { op, lhs, rhs } => Expr::Binary {
10854            op,
10855            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
10856            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
10857        },
10858        Expr::Unary { op, expr } => Expr::Unary {
10859            op,
10860            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
10861        },
10862        Expr::FunctionCall { name, args } => Expr::FunctionCall {
10863            name,
10864            args: args
10865                .into_iter()
10866                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
10867                .collect(),
10868        },
10869        other => other,
10870    }
10871}
10872
10873/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
10874/// about to be inserted into `child_table`, every FK declared on
10875/// that table is checked: the row's FK columns must either be
10876/// NULL (SQL spec skip) or match an existing parent row via the
10877/// parent's BTree PK / UNIQUE index.
10878///
10879/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
10880/// payload on first failure.
10881///
10882/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
10883/// == child_table`, the parent rows visible to this check are
10884///  (a) rows already committed to the table, plus
10885///  (b) earlier rows from the *same* `rows` batch.
10886/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
10887/// work in a single statement — common pattern for bulk-loading
10888/// hierarchies.
10889/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
10890/// constraints at INSERT time. For each constraint declared on
10891/// the target table, check that no existing row + no earlier row
10892/// in the same batch has the same full-column tuple. NULL in
10893/// any column lifts the row out of the check (SQL spec: NULL
10894/// ≠ NULL for uniqueness). mailrs G1 + G6.
10895fn enforce_uniqueness_inserts(
10896    catalog: &Catalog,
10897    child_table: &str,
10898    constraints: &[spg_storage::UniquenessConstraint],
10899    rows: &[Vec<Value>],
10900) -> Result<(), EngineError> {
10901    if constraints.is_empty() {
10902        return Ok(());
10903    }
10904    let table = catalog.get(child_table).ok_or_else(|| {
10905        EngineError::Storage(StorageError::TableNotFound {
10906            name: child_table.into(),
10907        })
10908    })?;
10909    for uc in constraints {
10910        for (batch_idx, row_values) in rows.iter().enumerate() {
10911            let key: Vec<&Value> = uc.columns.iter().map(|&i| &row_values[i]).collect();
10912            let has_null = key.iter().any(|v| matches!(v, Value::Null));
10913            // v7.13.0 — `NULLS NOT DISTINCT` (mailrs round-5 G10,
10914            // PG 15+): two rows whose constrained columns are all
10915            // NULL collide. SQL-standard `NULLS DISTINCT` lets any
10916            // NULL skip the check.
10917            if has_null && !uc.nulls_not_distinct {
10918                continue;
10919            }
10920            // Table-side collision: scan existing rows.
10921            let collides_in_table = table.rows().iter().any(|prow| {
10922                uc.columns
10923                    .iter()
10924                    .enumerate()
10925                    .all(|(i, &p)| prow.values.get(p) == Some(key[i]))
10926            });
10927            // Batch-side collision: earlier rows in the same INSERT.
10928            let collides_in_batch = rows[..batch_idx].iter().any(|earlier| {
10929                uc.columns
10930                    .iter()
10931                    .enumerate()
10932                    .all(|(i, &p)| earlier.get(p) == Some(key[i]))
10933            });
10934            if collides_in_table || collides_in_batch {
10935                let kind = if uc.is_primary_key {
10936                    "PRIMARY KEY"
10937                } else {
10938                    "UNIQUE"
10939                };
10940                let col_names: Vec<String> = uc
10941                    .columns
10942                    .iter()
10943                    .map(|&i| table.schema().columns[i].name.clone())
10944                    .collect();
10945                return Err(EngineError::Unsupported(alloc::format!(
10946                    "{kind} violation on {child_table:?} columns {col_names:?}: \
10947                     row #{batch_idx} duplicates an existing key"
10948                )));
10949            }
10950        }
10951    }
10952    Ok(())
10953}
10954
10955/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
10956/// WHERE-style predicate. NULL → false (three-valued logic
10957/// collapses to "skip this row" for index inclusion). Numeric
10958/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
10959/// Everything else (strings, vectors, JSON, …) is not a valid
10960/// predicate result and surfaces as `false` so a malformed
10961/// predicate degrades to "row not in index" rather than panicking.
10962fn predicate_truthy(v: &spg_storage::Value) -> bool {
10963    use spg_storage::Value as V;
10964    match v {
10965        V::Bool(b) => *b,
10966        V::Int(n) => *n != 0,
10967        V::BigInt(n) => *n != 0,
10968        V::SmallInt(n) => *n != 0,
10969        _ => false,
10970    }
10971}
10972
10973/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
10974/// committed rows for pre-existing duplicates. If any pair of rows
10975/// matches the predicate AND has the same index key, refuse to
10976/// create the index so the user fixes the data before retrying.
10977fn check_existing_unique_violation(
10978    idx: &spg_storage::Index,
10979    schema: &spg_storage::TableSchema,
10980    rows: &[spg_storage::Row],
10981) -> Result<(), EngineError> {
10982    let predicate_expr = match idx.partial_predicate.as_deref() {
10983        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
10984            EngineError::Unsupported(alloc::format!(
10985                "stored partial predicate {s:?} failed to re-parse: {e:?}"
10986            ))
10987        })?),
10988        None => None,
10989    };
10990    let ctx = eval::EvalContext::new(&schema.columns, None);
10991    let key_positions = unique_key_positions(idx);
10992    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
10993    for row in rows {
10994        if let Some(expr) = &predicate_expr {
10995            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
10996                EngineError::Unsupported(alloc::format!(
10997                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
10998                ))
10999            })?;
11000            if !predicate_truthy(&v) {
11001                continue;
11002            }
11003        }
11004        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
11005            .iter()
11006            .map(|&p| {
11007                row.values
11008                    .get(p)
11009                    .cloned()
11010                    .unwrap_or(spg_storage::Value::Null)
11011            })
11012            .collect();
11013        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
11014            continue;
11015        }
11016        if seen.iter().any(|other| *other == key) {
11017            return Err(EngineError::Unsupported(alloc::format!(
11018                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
11019                idx.name
11020            )));
11021        }
11022        seen.push(key);
11023    }
11024    Ok(())
11025}
11026
11027/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
11028/// extra positions). For single-column indexes this is just
11029/// `[column_position]`.
11030fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
11031    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
11032    out.push(idx.column_position);
11033    out.extend_from_slice(&idx.extra_column_positions);
11034    out
11035}
11036
11037/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
11038/// target table. For each, eval the index's optional predicate
11039/// against (a) the candidate row and (b) every committed row plus
11040/// earlier batch rows; only rows where the predicate is truthy
11041/// participate. A duplicate key among predicate-matching rows is a
11042/// uniqueness violation. NULL keys lift the row out of the check
11043/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
11044fn enforce_unique_index_inserts(
11045    catalog: &Catalog,
11046    table_name: &str,
11047    rows: &[alloc::vec::Vec<spg_storage::Value>],
11048) -> Result<(), EngineError> {
11049    let table = catalog.get(table_name).ok_or_else(|| {
11050        EngineError::Storage(StorageError::TableNotFound {
11051            name: table_name.into(),
11052        })
11053    })?;
11054    let schema = table.schema();
11055    let ctx = eval::EvalContext::new(&schema.columns, None);
11056    for idx in table.indices() {
11057        if !idx.is_unique {
11058            continue;
11059        }
11060        // Re-parse the predicate once per index per batch.
11061        let predicate_expr = match idx.partial_predicate.as_deref() {
11062            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
11063                EngineError::Unsupported(alloc::format!(
11064                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
11065                    idx.name
11066                ))
11067            })?),
11068            None => None,
11069        };
11070        let key_positions = unique_key_positions(idx);
11071        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
11072            key_positions
11073                .iter()
11074                .map(|&p| values.get(p).cloned().unwrap_or(spg_storage::Value::Null))
11075                .collect()
11076        };
11077        // Helper: does `values` participate in this index? (predicate
11078        // truthy when present.) Wraps `values` into a transient Row
11079        // because eval_expr requires &Row.
11080        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
11081            let Some(expr) = &predicate_expr else {
11082                return Ok(true);
11083            };
11084            let tmp_row = spg_storage::Row {
11085                values: values.to_vec(),
11086            };
11087            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
11088                EngineError::Unsupported(alloc::format!(
11089                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
11090                    idx.name
11091                ))
11092            })?;
11093            Ok(predicate_truthy(&v))
11094        };
11095        for (batch_idx, row_values) in rows.iter().enumerate() {
11096            if !participates(row_values)? {
11097                continue;
11098            }
11099            let key = key_of(row_values);
11100            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
11101                continue;
11102            }
11103            // Committed-table collision.
11104            for prow in table.rows() {
11105                if !participates(&prow.values)? {
11106                    continue;
11107                }
11108                if key_of(&prow.values) == key {
11109                    return Err(EngineError::Unsupported(alloc::format!(
11110                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
11111                         row #{batch_idx} duplicates an existing key",
11112                        idx.name
11113                    )));
11114                }
11115            }
11116            // Within-batch collision: earlier rows in the same INSERT.
11117            for earlier in &rows[..batch_idx] {
11118                if !participates(earlier)? {
11119                    continue;
11120                }
11121                if key_of(earlier) == key {
11122                    return Err(EngineError::Unsupported(alloc::format!(
11123                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
11124                         row #{batch_idx} duplicates an earlier row in the same batch",
11125                        idx.name
11126                    )));
11127                }
11128            }
11129        }
11130    }
11131    Ok(())
11132}
11133
11134/// v7.13.0 — `UPDATE OF cols` filter helper (mailrs round-5 G7).
11135/// Returns `true` when at least one of `filter_cols` has a
11136/// different value in `new_row` vs `old_row`. Column lookup is
11137/// case-insensitive against `schema_cols`; unknown filter columns
11138/// are treated as "not changed" (the trigger therefore won't
11139/// fire on them — surfacing a parse-time error would be too
11140/// strict for catalog reloads where the schema may have drifted).
11141fn any_column_changed(
11142    filter_cols: &[String],
11143    schema_cols: &[ColumnSchema],
11144    old_row: &Row,
11145    new_row: &Row,
11146) -> bool {
11147    for col_name in filter_cols {
11148        let Some(pos) = schema_cols
11149            .iter()
11150            .position(|c| c.name.eq_ignore_ascii_case(col_name))
11151        else {
11152            continue;
11153        };
11154        let old_v = old_row.values.get(pos);
11155        let new_v = new_row.values.get(pos);
11156        if old_v != new_v {
11157            return true;
11158        }
11159    }
11160    false
11161}
11162
11163/// v7.13.0 — evaluate every CHECK predicate on the schema against
11164/// each candidate row. Mirrors PG semantics: a `false` result
11165/// rejects the mutation; a NULL result *passes* (CHECK rejects
11166/// only on definite-false, not on unknown). mailrs round-5 G3.
11167fn enforce_check_constraints(
11168    catalog: &Catalog,
11169    table_name: &str,
11170    rows: &[alloc::vec::Vec<spg_storage::Value>],
11171) -> Result<(), EngineError> {
11172    let table = catalog.get(table_name).ok_or_else(|| {
11173        EngineError::Storage(StorageError::TableNotFound {
11174            name: table_name.into(),
11175        })
11176    })?;
11177    let schema = table.schema();
11178    if schema.checks.is_empty() {
11179        return Ok(());
11180    }
11181    let ctx = eval::EvalContext::new(&schema.columns, None);
11182    let mut parsed: alloc::vec::Vec<(usize, Expr)> = alloc::vec::Vec::new();
11183    for (i, src) in schema.checks.iter().enumerate() {
11184        let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
11185            EngineError::Unsupported(alloc::format!(
11186                "CHECK constraint #{i} on {table_name:?} ({src:?}) failed to re-parse: {e:?}"
11187            ))
11188        })?;
11189        parsed.push((i, expr));
11190    }
11191    for (batch_idx, row_values) in rows.iter().enumerate() {
11192        let tmp_row = spg_storage::Row {
11193            values: row_values.clone(),
11194        };
11195        for (i, expr) in &parsed {
11196            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
11197                EngineError::Unsupported(alloc::format!(
11198                    "CHECK constraint #{i} on {table_name:?} eval at row #{batch_idx}: {e:?}"
11199                ))
11200            })?;
11201            // PG: NULL passes (CHECK rejects on definite-false only).
11202            if matches!(v, spg_storage::Value::Bool(false)) {
11203                return Err(EngineError::Unsupported(alloc::format!(
11204                    "CHECK constraint violation on {table_name:?} (row #{batch_idx}): {:?}",
11205                    schema.checks[*i]
11206                )));
11207            }
11208        }
11209    }
11210    Ok(())
11211}
11212
11213fn enforce_fk_inserts(
11214    catalog: &Catalog,
11215    child_table: &str,
11216    fks: &[spg_storage::ForeignKeyConstraint],
11217    rows: &[Vec<Value>],
11218) -> Result<(), EngineError> {
11219    for fk in fks {
11220        let parent_is_self = fk.parent_table == child_table;
11221        let parent = if parent_is_self {
11222            // Self-ref: read the current state of the same table.
11223            // The mut borrow on child has been dropped by the caller.
11224            catalog.get(child_table).ok_or_else(|| {
11225                EngineError::Storage(StorageError::TableNotFound {
11226                    name: child_table.into(),
11227                })
11228            })?
11229        } else {
11230            catalog.get(&fk.parent_table).ok_or_else(|| {
11231                EngineError::Storage(StorageError::TableNotFound {
11232                    name: fk.parent_table.clone(),
11233                })
11234            })?
11235        };
11236        for (batch_idx, row_values) in rows.iter().enumerate() {
11237            // Single-column FK fast path: try the parent's BTree
11238            // index for an O(log n) lookup. Composite FKs fall back
11239            // to a parent-row scan.
11240            if fk.local_columns.len() == 1 {
11241                let v = &row_values[fk.local_columns[0]];
11242                if matches!(v, Value::Null) {
11243                    continue;
11244                }
11245                let parent_col = fk.parent_columns[0];
11246                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
11247                    EngineError::Unsupported(alloc::format!(
11248                        "FOREIGN KEY column value of type {:?} is not index-eligible",
11249                        v.data_type()
11250                    ))
11251                })?;
11252                let present_committed = parent.indices().iter().any(|idx| {
11253                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
11254                        && idx.column_position == parent_col
11255                        && idx.partial_predicate.is_none()
11256                        && !idx.lookup_eq(&key).is_empty()
11257                });
11258                // v7.6.7 self-ref widening: also accept a match
11259                // against earlier rows in this same batch when the
11260                // FK points at the table being inserted into.
11261                let present_in_batch = parent_is_self
11262                    && rows[..batch_idx]
11263                        .iter()
11264                        .any(|earlier| earlier.get(parent_col) == Some(v));
11265                if !(present_committed || present_in_batch) {
11266                    return Err(EngineError::Unsupported(alloc::format!(
11267                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
11268                        fk.parent_table,
11269                        parent
11270                            .schema()
11271                            .columns
11272                            .get(parent_col)
11273                            .map_or("?", |c| c.name.as_str()),
11274                        v,
11275                    )));
11276                }
11277            } else {
11278                // Composite FK: scan parent rows. v7.6.7 also
11279                // accepts a match against earlier rows in the same
11280                // batch (self-ref bulk-loading of hierarchies).
11281                if fk
11282                    .local_columns
11283                    .iter()
11284                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
11285                {
11286                    continue;
11287                }
11288                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
11289                let parent_match_committed = parent.rows().iter().any(|prow| {
11290                    fk.parent_columns
11291                        .iter()
11292                        .enumerate()
11293                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
11294                });
11295                let parent_match_in_batch = parent_is_self
11296                    && rows[..batch_idx].iter().any(|earlier| {
11297                        fk.parent_columns
11298                            .iter()
11299                            .enumerate()
11300                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
11301                    });
11302                if !(parent_match_committed || parent_match_in_batch) {
11303                    return Err(EngineError::Unsupported(alloc::format!(
11304                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
11305                        fk.parent_table,
11306                    )));
11307                }
11308            }
11309        }
11310    }
11311    Ok(())
11312}
11313
11314/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
11315/// DELETE on a parent. The plan is a list of these steps, stacked
11316/// across the FK graph by `plan_fk_parent_deletions`.
11317#[derive(Debug, Clone)]
11318struct FkChildStep {
11319    child_table: String,
11320    action: FkChildAction,
11321}
11322
11323#[derive(Debug, Clone)]
11324enum FkChildAction {
11325    /// CASCADE — remove these rows. Sorted, deduplicated positions.
11326    Delete { positions: Vec<usize> },
11327    /// SET NULL — for each (row, column) in the flat list, write
11328    /// NULL into that child cell. Multiple FKs on the same row may
11329    /// produce overlapping entries (deduped at plan time).
11330    SetNull {
11331        positions: Vec<usize>,
11332        columns: Vec<usize>,
11333    },
11334    /// SET DEFAULT — same shape as SetNull but writes the column's
11335    /// declared DEFAULT value (resolved at plan time). Columns
11336    /// without a DEFAULT raise an error during planning.
11337    SetDefault {
11338        positions: Vec<usize>,
11339        columns: Vec<usize>,
11340        defaults: Vec<Value>,
11341    },
11342}
11343
11344/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
11345///
11346/// Walks every table in the catalog looking for FKs whose
11347/// `parent_table` is `parent_table_name`. For each such FK + each
11348/// to-be-deleted parent row:
11349///
11350///   - RESTRICT / NoAction → error, no plan returned
11351///   - CASCADE → child rows get scheduled for deletion; recursive
11352///   - SetNull → child FK column(s) scheduled to be NULL-ed.
11353///     Verified NULL-able at plan time.
11354///   - SetDefault → child FK column(s) scheduled to be reset to
11355///     their declared DEFAULT. Columns without a DEFAULT raise.
11356///
11357/// SET NULL / SET DEFAULT do NOT cascade further — the child row
11358/// stays; only one of its columns mutates.
11359fn plan_fk_parent_deletions(
11360    catalog: &Catalog,
11361    parent_table_name: &str,
11362    to_delete_positions: &[usize],
11363    to_delete_rows: &[Vec<Value>],
11364) -> Result<Vec<FkChildStep>, EngineError> {
11365    use alloc::collections::{BTreeMap, BTreeSet};
11366    if to_delete_rows.is_empty() {
11367        return Ok(Vec::new());
11368    }
11369    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
11370    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
11371    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
11372    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
11373    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
11374    for &p in to_delete_positions {
11375        visited.insert((parent_table_name.to_string(), p));
11376    }
11377    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
11378        .iter()
11379        .map(|r| (parent_table_name.to_string(), r.clone()))
11380        .collect();
11381    while let Some((cur_parent, parent_row)) = work.pop() {
11382        for child_name in catalog.table_names() {
11383            let child = catalog
11384                .get(&child_name)
11385                .expect("table_names → catalog.get round-trip is total");
11386            for fk in &child.schema().foreign_keys {
11387                if fk.parent_table != cur_parent {
11388                    continue;
11389                }
11390                let parent_key: Vec<&Value> = fk
11391                    .parent_columns
11392                    .iter()
11393                    .map(|&pi| &parent_row[pi])
11394                    .collect();
11395                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
11396                    continue;
11397                }
11398                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
11399                    if child_name == cur_parent
11400                        && visited.contains(&(child_name.clone(), child_row_idx))
11401                    {
11402                        continue;
11403                    }
11404                    let matches_key = fk
11405                        .local_columns
11406                        .iter()
11407                        .enumerate()
11408                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
11409                    if !matches_key {
11410                        continue;
11411                    }
11412                    match fk.on_delete {
11413                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
11414                            return Err(EngineError::Unsupported(alloc::format!(
11415                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
11416                                 restricted by FK from {child_name:?}.{:?}",
11417                                fk.local_columns,
11418                            )));
11419                        }
11420                        spg_storage::FkAction::Cascade => {
11421                            if visited.insert((child_name.clone(), child_row_idx)) {
11422                                delete_plan
11423                                    .entry(child_name.clone())
11424                                    .or_default()
11425                                    .insert(child_row_idx);
11426                                work.push((child_name.clone(), child_row.values.clone()));
11427                            }
11428                        }
11429                        spg_storage::FkAction::SetNull => {
11430                            // Verify every local FK column is NULL-able.
11431                            for &li in &fk.local_columns {
11432                                let col = child.schema().columns.get(li).ok_or_else(|| {
11433                                    EngineError::Unsupported(alloc::format!(
11434                                        "FK local column {li} missing in {child_name:?}"
11435                                    ))
11436                                })?;
11437                                if !col.nullable {
11438                                    return Err(EngineError::Unsupported(alloc::format!(
11439                                        "FOREIGN KEY ON DELETE SET NULL: column \
11440                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
11441                                        col.name,
11442                                    )));
11443                                }
11444                            }
11445                            let entry = setnull_plan.entry(child_name.clone()).or_default();
11446                            for &li in &fk.local_columns {
11447                                entry.insert((child_row_idx, li));
11448                            }
11449                        }
11450                        spg_storage::FkAction::SetDefault => {
11451                            // Resolve the DEFAULT for every local FK col.
11452                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
11453                            for &li in &fk.local_columns {
11454                                let col = child.schema().columns.get(li).ok_or_else(|| {
11455                                    EngineError::Unsupported(alloc::format!(
11456                                        "FK local column {li} missing in {child_name:?}"
11457                                    ))
11458                                })?;
11459                                let default = col.default.clone().ok_or_else(|| {
11460                                    EngineError::Unsupported(alloc::format!(
11461                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
11462                                         {child_name:?}.{:?} has no DEFAULT declared",
11463                                        col.name,
11464                                    ))
11465                                })?;
11466                                entry.insert((child_row_idx, li), default);
11467                            }
11468                        }
11469                    }
11470                }
11471            }
11472        }
11473    }
11474    // Flatten the three plans into the ordered `FkChildStep` list.
11475    // Deletes are applied last per child (after any null/default
11476    // re-writes on the same child) so a child row that's both
11477    // re-written and then cascade-deleted only ends up deleted —
11478    // but in v7.6.5 SetNull/Cascade never overlap on the same row
11479    // (a single FK chooses exactly one action), so the order is
11480    // mostly a precaution.
11481    let mut steps: Vec<FkChildStep> = Vec::new();
11482    for (child_table, entries) in setnull_plan {
11483        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
11484        steps.push(FkChildStep {
11485            child_table,
11486            action: FkChildAction::SetNull { positions, columns },
11487        });
11488    }
11489    for (child_table, entries) in setdefault_plan {
11490        let mut positions = Vec::with_capacity(entries.len());
11491        let mut columns = Vec::with_capacity(entries.len());
11492        let mut defaults = Vec::with_capacity(entries.len());
11493        for ((p, c), v) in entries {
11494            positions.push(p);
11495            columns.push(c);
11496            defaults.push(v);
11497        }
11498        steps.push(FkChildStep {
11499            child_table,
11500            action: FkChildAction::SetDefault {
11501                positions,
11502                columns,
11503                defaults,
11504            },
11505        });
11506    }
11507    for (child_table, positions) in delete_plan {
11508        steps.push(FkChildStep {
11509            child_table,
11510            action: FkChildAction::Delete {
11511                positions: positions.into_iter().collect(),
11512            },
11513        });
11514    }
11515    Ok(steps)
11516}
11517
11518/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
11519/// PK/UNIQUE columns. Walks every other table whose FK references
11520/// `parent_table_name`; for each FK whose parent_columns overlap a
11521/// mutated column, decides the action by `fk.on_update`.
11522///
11523///   - RESTRICT / NoAction → error if any child references the OLD
11524///     value
11525///   - CASCADE → child FK columns get rewritten to the NEW parent
11526///     value (a SetNull-style update step with the new value)
11527///   - SetNull → child FK columns set to NULL
11528///   - SetDefault → child FK columns set to declared default
11529///
11530/// `plan_with_old` is `(row_position, old_values, new_values)` so
11531/// the planner can detect "did this row's parent key actually
11532/// change?" — only rows where at least one referenced parent
11533/// column moved trigger inbound work.
11534fn plan_fk_parent_updates(
11535    catalog: &Catalog,
11536    parent_table_name: &str,
11537    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
11538) -> Result<Vec<FkChildStep>, EngineError> {
11539    use alloc::collections::BTreeMap;
11540    if plan_with_old.is_empty() {
11541        return Ok(Vec::new());
11542    }
11543    // For each child table we may touch, build per-child step
11544    // lists. UPDATE never deletes children — `delete_plan` stays
11545    // empty here but is kept structurally aligned with
11546    // `plan_fk_parent_deletions` for future use.
11547    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
11548    let mut setnull_plan: BTreeMap<String, alloc::collections::BTreeSet<(usize, usize)>> =
11549        BTreeMap::new();
11550    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
11551    // Cascade-update plan: child_table → row_idx → col_idx → new_value
11552    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
11553
11554    for child_name in catalog.table_names() {
11555        let child = catalog
11556            .get(&child_name)
11557            .expect("table_names → catalog.get total");
11558        for fk in &child.schema().foreign_keys {
11559            if fk.parent_table != parent_table_name {
11560                continue;
11561            }
11562            for (_pos, old_row, new_row) in plan_with_old {
11563                // Did any parent FK column change?
11564                let key_changed = fk
11565                    .parent_columns
11566                    .iter()
11567                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
11568                if !key_changed {
11569                    continue;
11570                }
11571                // The OLD parent key — used to find referring children.
11572                let old_key: Vec<&Value> =
11573                    fk.parent_columns.iter().map(|&pi| &old_row[pi]).collect();
11574                if old_key.iter().any(|v| matches!(v, Value::Null)) {
11575                    // NULL parent has no children — skip.
11576                    continue;
11577                }
11578                let new_key: Vec<&Value> =
11579                    fk.parent_columns.iter().map(|&pi| &new_row[pi]).collect();
11580                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
11581                    // Self-ref same-row updates: a row updating its
11582                    // own PK doesn't restrict itself.
11583                    if child_name == parent_table_name
11584                        && plan_with_old.iter().any(|(p, _, _)| *p == child_row_idx)
11585                    {
11586                        continue;
11587                    }
11588                    let matches_key = fk
11589                        .local_columns
11590                        .iter()
11591                        .enumerate()
11592                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
11593                    if !matches_key {
11594                        continue;
11595                    }
11596                    match fk.on_update {
11597                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
11598                            return Err(EngineError::Unsupported(alloc::format!(
11599                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
11600                                 restricted by FK from {child_name:?}.{:?}",
11601                                fk.local_columns,
11602                            )));
11603                        }
11604                        spg_storage::FkAction::Cascade => {
11605                            // Rewrite child FK columns to new key.
11606                            let entry = cascade_plan.entry(child_name.clone()).or_default();
11607                            for (i, &li) in fk.local_columns.iter().enumerate() {
11608                                entry.insert((child_row_idx, li), new_key[i].clone());
11609                            }
11610                        }
11611                        spg_storage::FkAction::SetNull => {
11612                            for &li in &fk.local_columns {
11613                                let col = child.schema().columns.get(li).ok_or_else(|| {
11614                                    EngineError::Unsupported(alloc::format!(
11615                                        "FK local column {li} missing in {child_name:?}"
11616                                    ))
11617                                })?;
11618                                if !col.nullable {
11619                                    return Err(EngineError::Unsupported(alloc::format!(
11620                                        "FOREIGN KEY ON UPDATE SET NULL: column \
11621                                         {child_name:?}.{:?} is NOT NULL",
11622                                        col.name,
11623                                    )));
11624                                }
11625                            }
11626                            let entry = setnull_plan.entry(child_name.clone()).or_default();
11627                            for &li in &fk.local_columns {
11628                                entry.insert((child_row_idx, li));
11629                            }
11630                        }
11631                        spg_storage::FkAction::SetDefault => {
11632                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
11633                            for &li in &fk.local_columns {
11634                                let col = child.schema().columns.get(li).ok_or_else(|| {
11635                                    EngineError::Unsupported(alloc::format!(
11636                                        "FK local column {li} missing in {child_name:?}"
11637                                    ))
11638                                })?;
11639                                let default = col.default.clone().ok_or_else(|| {
11640                                    EngineError::Unsupported(alloc::format!(
11641                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
11642                                         {child_name:?}.{:?} has no DEFAULT",
11643                                        col.name,
11644                                    ))
11645                                })?;
11646                                entry.insert((child_row_idx, li), default);
11647                            }
11648                        }
11649                    }
11650                }
11651            }
11652        }
11653    }
11654    // Flatten into FkChildStep list. UPDATE doesn't produce
11655    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
11656    let mut steps: Vec<FkChildStep> = Vec::new();
11657    for (child_table, entries) in cascade_plan {
11658        let mut positions = Vec::with_capacity(entries.len());
11659        let mut columns = Vec::with_capacity(entries.len());
11660        let mut defaults = Vec::with_capacity(entries.len());
11661        for ((p, c), v) in entries {
11662            positions.push(p);
11663            columns.push(c);
11664            defaults.push(v);
11665        }
11666        // We reuse `FkChildAction::SetDefault` for cascade-update:
11667        // both shapes are "write a known value into specific cells"
11668        // — `apply_per_cell_writes` doesn't care whether the value
11669        // came from a DEFAULT declaration or a new parent key.
11670        steps.push(FkChildStep {
11671            child_table,
11672            action: FkChildAction::SetDefault {
11673                positions,
11674                columns,
11675                defaults,
11676            },
11677        });
11678    }
11679    for (child_table, entries) in setnull_plan {
11680        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
11681        steps.push(FkChildStep {
11682            child_table,
11683            action: FkChildAction::SetNull { positions, columns },
11684        });
11685    }
11686    for (child_table, entries) in setdefault_plan {
11687        let mut positions = Vec::with_capacity(entries.len());
11688        let mut columns = Vec::with_capacity(entries.len());
11689        let mut defaults = Vec::with_capacity(entries.len());
11690        for ((p, c), v) in entries {
11691            positions.push(p);
11692            columns.push(c);
11693            defaults.push(v);
11694        }
11695        steps.push(FkChildStep {
11696            child_table,
11697            action: FkChildAction::SetDefault {
11698                positions,
11699                columns,
11700                defaults,
11701            },
11702        });
11703    }
11704    let _ = delete_plan; // UPDATE never deletes children.
11705    Ok(steps)
11706}
11707
11708/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
11709/// the three action variants so the DELETE executor stays a
11710/// simple loop over the planned steps.
11711fn apply_fk_child_step(catalog: &mut Catalog, step: &FkChildStep) -> Result<(), EngineError> {
11712    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
11713        EngineError::Storage(StorageError::TableNotFound {
11714            name: step.child_table.clone(),
11715        })
11716    })?;
11717    match &step.action {
11718        FkChildAction::Delete { positions } => {
11719            let _ = child.delete_rows(positions);
11720        }
11721        FkChildAction::SetNull { positions, columns } => {
11722            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
11723        }
11724        FkChildAction::SetDefault {
11725            positions,
11726            columns,
11727            defaults,
11728        } => {
11729            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
11730        }
11731    }
11732    Ok(())
11733}
11734
11735/// v7.6.5 — write new values into selected child cells via
11736/// `Table::update_row` (the catalog's existing UPDATE entry).
11737/// Groups writes by row position so multi-column updates on the
11738/// same row only call `update_row` once. `value_for(i)` produces
11739/// the new value for the i-th (position, column) entry.
11740fn apply_per_cell_writes(
11741    child: &mut spg_storage::Table,
11742    positions: &[usize],
11743    columns: &[usize],
11744    mut value_for: impl FnMut(usize) -> Value,
11745) -> Result<(), EngineError> {
11746    use alloc::collections::BTreeMap;
11747    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
11748    for i in 0..positions.len() {
11749        by_row
11750            .entry(positions[i])
11751            .or_default()
11752            .push((columns[i], value_for(i)));
11753    }
11754    for (pos, mutations) in by_row {
11755        let mut new_values = child.rows()[pos].values.clone();
11756        for (col, v) in mutations {
11757            if let Some(slot) = new_values.get_mut(col) {
11758                *slot = v;
11759            }
11760        }
11761        child
11762            .update_row(pos, new_values)
11763            .map_err(EngineError::Storage)?;
11764    }
11765    Ok(())
11766}
11767
11768fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
11769    match a {
11770        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
11771        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
11772        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
11773        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
11774        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
11775    }
11776}
11777
11778/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
11779/// default-fill. Free fn (rather than `&self`) so callers
11780/// with an active `&mut Table` borrow can still use it.
11781/// Literal defaults take the cached path (`col.default`);
11782/// runtime defaults hit `clock_fn` at each call. mailrs G4.
11783fn resolve_column_default_free(
11784    col: &ColumnSchema,
11785    clock_fn: Option<ClockFn>,
11786) -> Result<Value, EngineError> {
11787    if let Some(rt) = &col.runtime_default {
11788        return eval_runtime_default_free(rt, col.ty, clock_fn);
11789    }
11790    Ok(col.default.clone().unwrap_or(Value::Null))
11791}
11792
11793fn eval_runtime_default_free(
11794    rt: &str,
11795    ty: DataType,
11796    clock_fn: Option<ClockFn>,
11797) -> Result<Value, EngineError> {
11798    let s = rt.trim().to_ascii_lowercase();
11799    let canonical = s.trim_end_matches("()");
11800    let now_us = match clock_fn {
11801        Some(f) => f(),
11802        None => 0,
11803    };
11804    let v = match canonical {
11805        "now" | "current_timestamp" | "localtimestamp" => Value::Timestamp(now_us),
11806        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
11807        "current_time" | "localtime" => Value::Timestamp(now_us),
11808        other => {
11809            return Err(EngineError::Unsupported(alloc::format!(
11810                "runtime DEFAULT expression {other:?} not supported \
11811                 (v7.9.21 whitelist: now() / current_timestamp / \
11812                 current_date / current_time / localtimestamp / \
11813                 localtime)"
11814            )));
11815        }
11816    };
11817    coerce_value(v, ty, "DEFAULT", 0)
11818}
11819
11820/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
11821/// evaluation rather than being cacheable as a literal Value.
11822/// FunctionCall is the immediate case (`now()`,
11823/// `current_timestamp`). Literal expressions and simple sign-
11824/// flipped numerics still take the static-cache path.
11825fn is_runtime_default_expr(expr: &Expr) -> bool {
11826    match expr {
11827        Expr::FunctionCall { .. } => true,
11828        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
11829        _ => false,
11830    }
11831}
11832
11833fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
11834    let ty = column_type_to_data_type(c.ty);
11835    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
11836    if let Some(default_expr) = c.default {
11837        // v7.9.21 — distinguish literal defaults (evaluated once
11838        // at CREATE TABLE) from expression defaults (deferred to
11839        // INSERT). Function calls (`now()`, `current_timestamp`
11840        // — see v7.9.20 keyword promotion) take the runtime path.
11841        // Literals continue to cache. mailrs G4.
11842        if is_runtime_default_expr(&default_expr) {
11843            let display = alloc::format!("{default_expr}");
11844            schema = schema.with_runtime_default(display);
11845        } else {
11846            let raw = literal_expr_to_value(default_expr)?;
11847            let coerced = coerce_value(raw, ty, &c.name, 0)?;
11848            schema = schema.with_default(coerced);
11849        }
11850    }
11851    if c.auto_increment {
11852        // AUTO_INCREMENT only makes sense on integer-shaped columns.
11853        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
11854            return Err(EngineError::Unsupported(alloc::format!(
11855                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
11856            )));
11857        }
11858        schema = schema.with_auto_increment();
11859    }
11860    Ok(schema)
11861}
11862
11863/// v7.10.4 — decode a BYTEA literal. Accepts:
11864///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
11865///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
11866///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
11867fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
11868    let s = s.trim();
11869    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
11870        // Hex form. Each pair of hex digits → one byte.
11871        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
11872        if cleaned.len() % 2 != 0 {
11873            return Err("odd-length hex literal");
11874        }
11875        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
11876        let cleaned_bytes = cleaned.as_bytes();
11877        for i in (0..cleaned_bytes.len()).step_by(2) {
11878            let hi = hex_nibble(cleaned_bytes[i])?;
11879            let lo = hex_nibble(cleaned_bytes[i + 1])?;
11880            out.push((hi << 4) | lo);
11881        }
11882        return Ok(out);
11883    }
11884    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
11885    // sequences decode; anything else is a literal byte.
11886    let bytes = s.as_bytes();
11887    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
11888    let mut i = 0;
11889    while i < bytes.len() {
11890        let b = bytes[i];
11891        if b == b'\\' && i + 1 < bytes.len() {
11892            let n = bytes[i + 1];
11893            if n == b'\\' {
11894                out.push(b'\\');
11895                i += 2;
11896                continue;
11897            }
11898            if n.is_ascii_digit()
11899                && i + 3 < bytes.len()
11900                && bytes[i + 2].is_ascii_digit()
11901                && bytes[i + 3].is_ascii_digit()
11902            {
11903                let oct = |x: u8| (x - b'0') as u32;
11904                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
11905                if v <= 0xFF {
11906                    out.push(v as u8);
11907                    i += 4;
11908                    continue;
11909                }
11910            }
11911        }
11912        out.push(b);
11913        i += 1;
11914    }
11915    Ok(out)
11916}
11917
11918fn hex_nibble(b: u8) -> Result<u8, &'static str> {
11919    match b {
11920        b'0'..=b'9' => Ok(b - b'0'),
11921        b'a'..=b'f' => Ok(b - b'a' + 10),
11922        b'A'..=b'F' => Ok(b - b'A' + 10),
11923        _ => Err("invalid hex digit"),
11924    }
11925}
11926
11927/// v7.10.11 — decode a PG TEXT[] external array form
11928/// (`{a,b,NULL}` with optional double-quoted elements). The
11929/// engine takes a leading/trailing `{`/`}` and splits at commas.
11930/// Quoted elements (`"hello, world"`) preserve embedded commas;
11931/// `\\` and `\"` decode to literal backslash / quote. Plain
11932/// unquoted `NULL` (case-insensitive) maps to `None`.
11933/// v7.11.13 — pick the array type for `ARRAY[lit, …]` from the
11934/// element values. Single-element-type rules:
11935///   - all NULL / all Text → TextArray
11936///   - all Int (or Int+NULL) → IntArray
11937///   - any BigInt without Text → BigIntArray (widening)
11938///   - any Text → TextArray (fallback; non-string elements
11939///     render as text)
11940fn array_literal_widen(items: alloc::vec::Vec<Value>) -> Value {
11941    let mut has_text = false;
11942    let mut has_bigint = false;
11943    let mut has_int = false;
11944    for v in &items {
11945        match v {
11946            Value::Null => {}
11947            Value::Text(_) | Value::Json(_) => has_text = true,
11948            Value::BigInt(_) => has_bigint = true,
11949            Value::Int(_) | Value::SmallInt(_) => has_int = true,
11950            _ => has_text = true,
11951        }
11952    }
11953    if has_text || (!has_bigint && !has_int) {
11954        let out: alloc::vec::Vec<Option<alloc::string::String>> = items
11955            .into_iter()
11956            .map(|v| match v {
11957                Value::Null => None,
11958                Value::Text(s) | Value::Json(s) => Some(s),
11959                other => Some(alloc::format!("{other:?}")),
11960            })
11961            .collect();
11962        return Value::TextArray(out);
11963    }
11964    if has_bigint {
11965        let out: alloc::vec::Vec<Option<i64>> = items
11966            .into_iter()
11967            .map(|v| match v {
11968                Value::Null => None,
11969                Value::Int(n) => Some(i64::from(n)),
11970                Value::SmallInt(n) => Some(i64::from(n)),
11971                Value::BigInt(n) => Some(n),
11972                _ => unreachable!("widen: unexpected non-integer in BigInt path"),
11973            })
11974            .collect();
11975        return Value::BigIntArray(out);
11976    }
11977    let out: alloc::vec::Vec<Option<i32>> = items
11978        .into_iter()
11979        .map(|v| match v {
11980            Value::Null => None,
11981            Value::Int(n) => Some(n),
11982            Value::SmallInt(n) => Some(i32::from(n)),
11983            _ => unreachable!("widen: unexpected non-i32-compatible in Int path"),
11984        })
11985        .collect();
11986    Value::IntArray(out)
11987}
11988
11989fn decode_text_array_literal(
11990    s: &str,
11991) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
11992    let trimmed = s.trim();
11993    let inner = trimmed
11994        .strip_prefix('{')
11995        .and_then(|x| x.strip_suffix('}'))
11996        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
11997    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
11998    if inner.trim().is_empty() {
11999        return Ok(out);
12000    }
12001    let bytes = inner.as_bytes();
12002    let mut i = 0;
12003    while i <= bytes.len() {
12004        // Skip leading whitespace.
12005        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
12006            i += 1;
12007        }
12008        // Quoted element.
12009        if i < bytes.len() && bytes[i] == b'"' {
12010            i += 1; // open quote
12011            let mut buf = alloc::string::String::new();
12012            while i < bytes.len() && bytes[i] != b'"' {
12013                if bytes[i] == b'\\' && i + 1 < bytes.len() {
12014                    buf.push(bytes[i + 1] as char);
12015                    i += 2;
12016                } else {
12017                    buf.push(bytes[i] as char);
12018                    i += 1;
12019                }
12020            }
12021            if i >= bytes.len() {
12022                return Err("unterminated quoted element");
12023            }
12024            i += 1; // close quote
12025            out.push(Some(buf));
12026        } else {
12027            // Unquoted element — read until next comma or end.
12028            let start = i;
12029            while i < bytes.len() && bytes[i] != b',' {
12030                i += 1;
12031            }
12032            let raw = inner[start..i].trim();
12033            if raw.eq_ignore_ascii_case("NULL") {
12034                out.push(None);
12035            } else {
12036                out.push(Some(alloc::string::ToString::to_string(raw)));
12037            }
12038        }
12039        // Skip whitespace, expect comma or end.
12040        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
12041            i += 1;
12042        }
12043        if i >= bytes.len() {
12044            break;
12045        }
12046        if bytes[i] != b',' {
12047            return Err("expected ',' between TEXT[] elements");
12048        }
12049        i += 1;
12050    }
12051    Ok(out)
12052}
12053
12054/// v7.10.11 — encode a TEXT[] back into the PG external array
12055/// form. NULL elements become the literal `NULL`; elements
12056/// containing commas, quotes, backslashes, or braces are
12057/// double-quoted with `\\` / `\"` escapes.
12058fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
12059    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
12060    out.push('{');
12061    for (i, item) in items.iter().enumerate() {
12062        if i > 0 {
12063            out.push(',');
12064        }
12065        match item {
12066            None => out.push_str("NULL"),
12067            Some(s) => {
12068                let needs_quote = s.is_empty()
12069                    || s.eq_ignore_ascii_case("NULL")
12070                    || s.chars()
12071                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
12072                if needs_quote {
12073                    out.push('"');
12074                    for c in s.chars() {
12075                        if c == '"' || c == '\\' {
12076                            out.push('\\');
12077                        }
12078                        out.push(c);
12079                    }
12080                    out.push('"');
12081                } else {
12082                    out.push_str(s);
12083                }
12084            }
12085        }
12086    }
12087    out.push('}');
12088    out
12089}
12090
12091/// v7.10.4 — encode BYTEA bytes in PG hex output format
12092/// (`\x` prefix, lowercase hex pairs). Used by Text-side
12093/// round-trip + the wire layer's text-mode encoder.
12094fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
12095    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
12096    out.push_str("\\x");
12097    for byte in b {
12098        let hi = byte >> 4;
12099        let lo = byte & 0x0F;
12100        out.push(hex_digit(hi));
12101        out.push(hex_digit(lo));
12102    }
12103    out
12104}
12105
12106const fn hex_digit(n: u8) -> char {
12107    match n {
12108        0..=9 => (b'0' + n) as char,
12109        10..=15 => (b'a' + n - 10) as char,
12110        _ => '?',
12111    }
12112}
12113
12114const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
12115    match t {
12116        ColumnTypeName::SmallInt => DataType::SmallInt,
12117        ColumnTypeName::Int => DataType::Int,
12118        ColumnTypeName::BigInt => DataType::BigInt,
12119        ColumnTypeName::Float => DataType::Float,
12120        ColumnTypeName::Text => DataType::Text,
12121        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
12122        ColumnTypeName::Char(n) => DataType::Char(n),
12123        ColumnTypeName::Bool => DataType::Bool,
12124        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
12125            dim,
12126            encoding: match encoding {
12127                SqlVecEncoding::F32 => VecEncoding::F32,
12128                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
12129                SqlVecEncoding::F16 => VecEncoding::F16,
12130            },
12131        },
12132        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
12133        ColumnTypeName::Date => DataType::Date,
12134        ColumnTypeName::Timestamp => DataType::Timestamp,
12135        ColumnTypeName::Timestamptz => DataType::Timestamptz,
12136        ColumnTypeName::Json => DataType::Json,
12137        ColumnTypeName::Jsonb => DataType::Jsonb,
12138        ColumnTypeName::Bytes => DataType::Bytes,
12139        ColumnTypeName::TextArray => DataType::TextArray,
12140        ColumnTypeName::IntArray => DataType::IntArray,
12141        ColumnTypeName::BigIntArray => DataType::BigIntArray,
12142        ColumnTypeName::TsVector => DataType::TsVector,
12143        ColumnTypeName::TsQuery => DataType::TsQuery,
12144    }
12145}
12146
12147/// Convert an INSERT VALUES expression to a storage Value. Supports literal
12148/// expressions, unary-minus over numeric literals, and pgvector-style
12149/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
12150fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
12151    match expr {
12152        Expr::Literal(l) => Ok(literal_to_value(l)),
12153        Expr::Cast { expr, target } => {
12154            let inner_value = literal_expr_to_value(*expr)?;
12155            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
12156        }
12157        Expr::Unary {
12158            op: UnOp::Neg,
12159            expr,
12160        } => match *expr {
12161            Expr::Literal(Literal::Integer(n)) => {
12162                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
12163                // — overflow on negate of i64::MIN is the one edge case.
12164                let neg = n.checked_neg().ok_or_else(|| {
12165                    EngineError::Unsupported("integer literal overflow on negation".into())
12166                })?;
12167                Ok(int_value_for(neg))
12168            }
12169            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
12170            other => Err(EngineError::Unsupported(alloc::format!(
12171                "unary minus over non-literal expression: {other:?}"
12172            ))),
12173        },
12174        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
12175        // INSERT-time. Each element must reduce to a Value through
12176        // `literal_expr_to_value`; NULL elements become `None`.
12177        // v7.11.13 — deduce shape from element values: all Int →
12178        // IntArray; any BigInt → BigIntArray (widening); any Text
12179        // → TextArray. Cast targets (`ARRAY[]::INT[]`) flow through
12180        // the outer Cast arm before reaching here and re-coerce.
12181        Expr::Array(items) => {
12182            let mut materialised: alloc::vec::Vec<Value> =
12183                alloc::vec::Vec::with_capacity(items.len());
12184            for elem in items {
12185                materialised.push(literal_expr_to_value(elem)?);
12186            }
12187            Ok(array_literal_widen(materialised))
12188        }
12189        other => Err(EngineError::Unsupported(alloc::format!(
12190            "non-literal INSERT value expression: {other:?}"
12191        ))),
12192    }
12193}
12194
12195fn literal_to_value(l: Literal) -> Value {
12196    match l {
12197        Literal::Integer(n) => int_value_for(n),
12198        Literal::Float(x) => Value::Float(x),
12199        Literal::String(s) => Value::Text(s),
12200        Literal::Bool(b) => Value::Bool(b),
12201        Literal::Null => Value::Null,
12202        Literal::Vector(v) => Value::Vector(v),
12203        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
12204    }
12205}
12206
12207/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
12208/// columns will still enforce the right tag downstream — this is just the
12209/// default we synthesise from an unannotated integer literal.
12210fn int_value_for(n: i64) -> Value {
12211    if let Ok(small) = i32::try_from(n) {
12212        Value::Int(small)
12213    } else {
12214        Value::BigInt(n)
12215    }
12216}
12217
12218/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
12219/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
12220/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
12221/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
12222/// `NULL` is always permitted; the nullability check happens later in storage.
12223#[allow(clippy::too_many_lines)]
12224fn coerce_value(
12225    v: Value,
12226    expected: DataType,
12227    col_name: &str,
12228    position: usize,
12229) -> Result<Value, EngineError> {
12230    if v.is_null() {
12231        return Ok(Value::Null);
12232    }
12233    let actual = v.data_type().expect("non-null");
12234    if actual == expected {
12235        return Ok(v);
12236    }
12237    let coerced = match (v, expected) {
12238        (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
12239        (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
12240        (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
12241        (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
12242            i128::from(n),
12243            precision,
12244            scale,
12245            col_name,
12246        )?),
12247        (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
12248        (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
12249        (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
12250        (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
12251            i128::from(n),
12252            precision,
12253            scale,
12254            col_name,
12255        )?),
12256        (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
12257        (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
12258        #[allow(clippy::cast_precision_loss)]
12259        (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
12260        (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
12261            i128::from(n),
12262            precision,
12263            scale,
12264            col_name,
12265        )?),
12266        (Value::Float(x), DataType::Numeric { precision, scale }) => {
12267            Some(numeric_from_float(x, precision, scale, col_name)?)
12268        }
12269        // Text → DATE / TIMESTAMP: parse canonical text forms.
12270        (Value::Text(s), DataType::Date) => {
12271            let d = eval::parse_date_literal(&s).ok_or_else(|| {
12272                EngineError::Eval(EvalError::TypeMismatch {
12273                    detail: alloc::format!("cannot parse {s:?} as DATE for column `{col_name}`"),
12274                })
12275            })?;
12276            Some(Value::Date(d))
12277        }
12278        // v7.14.0 — MySQL DEFAULT clauses quote integer / float
12279        // / boolean literals (`DEFAULT '0'`, `DEFAULT '1'`,
12280        // `DEFAULT '3.14'`, `DEFAULT 'true'`). Coerce the text
12281        // form to the column's numeric / bool type at DEFAULT-
12282        // installation time so the storage check sees a typed
12283        // value. Parse failures fall through to TypeMismatch.
12284        (Value::Text(s), DataType::SmallInt) => s.parse::<i16>().ok().map(Value::SmallInt),
12285        (Value::Text(s), DataType::Int) => s.parse::<i32>().ok().map(Value::Int),
12286        (Value::Text(s), DataType::BigInt) => s.parse::<i64>().ok().map(Value::BigInt),
12287        (Value::Text(s), DataType::Float) => s.parse::<f64>().ok().map(Value::Float),
12288        (Value::Text(s), DataType::Bool) => match s.to_ascii_lowercase().as_str() {
12289            "0" | "false" | "f" | "no" | "off" => Some(Value::Bool(false)),
12290            "1" | "true" | "t" | "yes" | "on" => Some(Value::Bool(true)),
12291            _ => None,
12292        },
12293        // v4.9: Text ↔ JSON coercion. No structural validation —
12294        // any text literal is accepted; the responsibility for
12295        // valid JSON lies with the producer.
12296        (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
12297        (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
12298        // v7.13.3 — mailrs round-7 S10. SPG's storage represents
12299        // both JSON and JSONB on-disk as `Value::Json(String)` —
12300        // they share the underlying text payload. The cast
12301        // `'<text>'::jsonb` produces a Value::Json that needs to
12302        // satisfy a DataType::Jsonb column. Identity coerce in
12303        // both directions so JSON ↔ JSONB assignments work at all
12304        // INSERT / ALTER COLUMN TYPE / DEFAULT contexts.
12305        (Value::Json(s), DataType::Jsonb | DataType::Json) => Some(Value::Json(s)),
12306        // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
12307        //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
12308        //   - Escape: `Hello\\000world`  (backslash + octal triples)
12309        //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
12310        // Errors surface as TypeMismatch so the operator gets a
12311        // clear "this literal isn't a bytea literal" hint.
12312        (Value::Text(s), DataType::Bytes) => {
12313            let bytes = decode_bytea_literal(&s).map_err(|e| {
12314                EngineError::Eval(EvalError::TypeMismatch {
12315                    detail: alloc::format!(
12316                        "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
12317                    ),
12318                })
12319            })?;
12320            Some(Value::Bytes(bytes))
12321        }
12322        // v7.10.4 — BYTEA → Text round-trip uses the PG hex
12323        // output (lowercase, `\x` prefix). Important when a
12324        // SELECT pulls a bytea cell through a Text column path.
12325        (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
12326        // v7.10.11 — Text → TEXT[]. Decode PG's external array
12327        // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
12328        // is the literal `NULL`; everything else is a quoted or
12329        // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
12330        (Value::Text(s), DataType::TextArray) => {
12331            let arr = decode_text_array_literal(&s).map_err(|e| {
12332                EngineError::Eval(EvalError::TypeMismatch {
12333                    detail: alloc::format!(
12334                        "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
12335                    ),
12336                })
12337            })?;
12338            Some(Value::TextArray(arr))
12339        }
12340        // v7.16.0 — Text → IntArray / BigIntArray for the
12341        // spg-sqlx Bind path. Decode the PG external form
12342        // `{1,2,3}` as a TEXT array first, then parse each
12343        // element as int. Same shape as the TextArray decode
12344        // above with an element-wise narrow.
12345        (Value::Text(s), DataType::IntArray) => {
12346            let arr = decode_text_array_literal(&s).map_err(|e| {
12347                EngineError::Eval(EvalError::TypeMismatch {
12348                    detail: alloc::format!(
12349                        "cannot parse {s:?} as INT[] for column `{col_name}`: {e}"
12350                    ),
12351                })
12352            })?;
12353            let mut out: Vec<Option<i32>> = Vec::with_capacity(arr.len());
12354            for elem in arr {
12355                match elem {
12356                    None => out.push(None),
12357                    Some(t) => {
12358                        let n: i32 = t.parse().map_err(|_| {
12359                            EngineError::Eval(EvalError::TypeMismatch {
12360                                detail: alloc::format!(
12361                                    "cannot parse {t:?} as INT element for `{col_name}`"
12362                                ),
12363                            })
12364                        })?;
12365                        out.push(Some(n));
12366                    }
12367                }
12368            }
12369            Some(Value::IntArray(out))
12370        }
12371        (Value::Text(s), DataType::BigIntArray) => {
12372            let arr = decode_text_array_literal(&s).map_err(|e| {
12373                EngineError::Eval(EvalError::TypeMismatch {
12374                    detail: alloc::format!(
12375                        "cannot parse {s:?} as BIGINT[] for column `{col_name}`: {e}"
12376                    ),
12377                })
12378            })?;
12379            let mut out: Vec<Option<i64>> = Vec::with_capacity(arr.len());
12380            for elem in arr {
12381                match elem {
12382                    None => out.push(None),
12383                    Some(t) => {
12384                        let n: i64 = t.parse().map_err(|_| {
12385                            EngineError::Eval(EvalError::TypeMismatch {
12386                                detail: alloc::format!(
12387                                    "cannot parse {t:?} as BIGINT element for `{col_name}`"
12388                                ),
12389                            })
12390                        })?;
12391                        out.push(Some(n));
12392                    }
12393                }
12394            }
12395            Some(Value::BigIntArray(out))
12396        }
12397        // v7.10.11 — TEXT[] → Text round-trip uses PG's
12398        // external array form (`{a,b,NULL}`). Lets a SELECT
12399        // pull an array column through any Text-side codepath.
12400        (Value::TextArray(items), DataType::Text) => Some(Value::Text(encode_text_array(&items))),
12401        // v7.16.1 — Text → TSVECTOR auto-coerce for the
12402        // INSERT-side wire path (mailrs round-9 A.2.a). PG
12403        // implicitly promotes the TEXT literal at INSERT into a
12404        // TSVECTOR column; SPG previously rejected with a hard
12405        // type mismatch, blocking 23,276 pg_dump rows into
12406        // `messages.search_vector`. We route through the same
12407        // `decode_tsvector_external` the `::tsvector` cast
12408        // already uses, so PG-canonical forms (`'word'`,
12409        // `'word:1A,2B'`, multi-lexeme, empty `''`) all parse.
12410        (Value::Text(s), DataType::TsVector) => {
12411            let lexs = eval::decode_tsvector_external(&s).map_err(|e| {
12412                EngineError::Eval(EvalError::TypeMismatch {
12413                    detail: alloc::format!(
12414                        "cannot parse {s:?} as TSVECTOR for column `{col_name}`: {e}"
12415                    ),
12416                })
12417            })?;
12418            Some(Value::TsVector(lexs))
12419        }
12420        (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
12421            let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
12422                EngineError::Eval(EvalError::TypeMismatch {
12423                    detail: alloc::format!(
12424                        "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
12425                    ),
12426                })
12427            })?;
12428            Some(Value::Timestamp(t))
12429        }
12430        // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
12431        // TIMESTAMP → day truncation).
12432        (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
12433            Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
12434        }
12435        // v7.9.21 — Value::Timestamp lands in either Timestamp
12436        // or Timestamptz columns; the on-disk layout is the
12437        // same i64 microseconds UTC.
12438        (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
12439        (Value::Timestamp(t), DataType::Date) => {
12440            let days = t.div_euclid(86_400_000_000);
12441            i32::try_from(days).ok().map(Value::Date)
12442        }
12443        (
12444            Value::Numeric {
12445                scaled,
12446                scale: src_scale,
12447            },
12448            DataType::Numeric { precision, scale },
12449        ) => Some(numeric_rescale(
12450            scaled, src_scale, precision, scale, col_name,
12451        )?),
12452        #[allow(clippy::cast_precision_loss)]
12453        (Value::Numeric { scaled, scale }, DataType::Float) => {
12454            let mut div = 1.0_f64;
12455            for _ in 0..scale {
12456                div *= 10.0;
12457            }
12458            Some(Value::Float((scaled as f64) / div))
12459        }
12460        (Value::Numeric { scaled, scale }, DataType::Int) => {
12461            let truncated = numeric_truncate_to_integer(scaled, scale);
12462            i32::try_from(truncated).ok().map(Value::Int)
12463        }
12464        (Value::Numeric { scaled, scale }, DataType::BigInt) => {
12465            let truncated = numeric_truncate_to_integer(scaled, scale);
12466            i64::try_from(truncated).ok().map(Value::BigInt)
12467        }
12468        (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
12469            let truncated = numeric_truncate_to_integer(scaled, scale);
12470            i16::try_from(truncated).ok().map(Value::SmallInt)
12471        }
12472        // VARCHAR(n) enforces an upper bound on character count.
12473        (Value::Text(s), DataType::Varchar(max)) => {
12474            if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
12475                Some(Value::Text(s))
12476            } else {
12477                return Err(EngineError::Unsupported(alloc::format!(
12478                    "value for VARCHAR({max}) column `{col_name}` exceeds length: \
12479                     {} chars",
12480                    s.chars().count()
12481                )));
12482            }
12483        }
12484        // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
12485        // when the column declares `VECTOR(N) USING SQ8` and
12486        // the INSERT VALUES expression yields a raw f32 vector
12487        // (the normal pgvector-shape literal). Dim mismatch
12488        // falls through the `_ => None` arm and surfaces as
12489        // `TypeMismatch` with the expected SQ8 column type —
12490        // matching the F32 path's existing error.
12491        (
12492            Value::Vector(v),
12493            DataType::Vector {
12494                dim,
12495                encoding: VecEncoding::Sq8,
12496            },
12497        ) if v.len() == dim as usize => Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v))),
12498        // v6.0.3: f32 → f16 INSERT-time conversion for HALF
12499        // columns. Bit-exact at the storage layer (modulo
12500        // half-precision rounding); no rerank pass needed at
12501        // search time.
12502        (
12503            Value::Vector(v),
12504            DataType::Vector {
12505                dim,
12506                encoding: VecEncoding::F16,
12507            },
12508        ) if v.len() == dim as usize => Some(Value::HalfVector(
12509            spg_storage::halfvec::HalfVector::from_f32_slice(&v),
12510        )),
12511        // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
12512        // is already longer we reject (PG truncates trailing-space-only;
12513        // staying strict for v1).
12514        (Value::Text(s), DataType::Char(size)) => {
12515            let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
12516            if len > size {
12517                return Err(EngineError::Unsupported(alloc::format!(
12518                    "value for CHAR({size}) column `{col_name}` exceeds length: \
12519                     {len} chars"
12520                )));
12521            }
12522            let need = (size - len) as usize;
12523            let mut padded = s;
12524            padded.reserve(need);
12525            for _ in 0..need {
12526                padded.push(' ');
12527            }
12528            Some(Value::Text(padded))
12529        }
12530        _ => None,
12531    };
12532    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
12533        column: col_name.into(),
12534        expected,
12535        actual,
12536        position,
12537    }))
12538}
12539
12540/// v7.12.4 — render a function arg list into the
12541/// canonical form the storage layer caches as
12542/// [`spg_storage::FunctionDef::args_repr`]. The catalogue uses
12543/// this string for both display + as a coarse signature key
12544/// for the (deferred) overload resolution v7.12.5+ adds.
12545fn render_function_args(args: &[spg_sql::ast::FunctionArg]) -> alloc::string::String {
12546    use core::fmt::Write;
12547    let mut out = alloc::string::String::from("(");
12548    for (i, a) in args.iter().enumerate() {
12549        if i > 0 {
12550            out.push_str(", ");
12551        }
12552        match a.mode {
12553            spg_sql::ast::FunctionArgMode::In => {}
12554            spg_sql::ast::FunctionArgMode::Out => out.push_str("OUT "),
12555            spg_sql::ast::FunctionArgMode::InOut => out.push_str("INOUT "),
12556        }
12557        if let Some(n) = &a.name {
12558            out.push_str(n);
12559            out.push(' ');
12560        }
12561        match &a.ty {
12562            spg_sql::ast::FunctionArgType::Typed(t) => {
12563                let _ = write!(out, "{t}");
12564            }
12565            spg_sql::ast::FunctionArgType::Raw(s) => out.push_str(s),
12566        }
12567    }
12568    out.push(')');
12569    out
12570}
12571
12572#[cfg(test)]
12573mod tests {
12574    use super::*;
12575    use alloc::vec;
12576
12577    fn unwrap_command_ok(r: &QueryResult) -> usize {
12578        match r {
12579            QueryResult::CommandOk { affected, .. } => *affected,
12580            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
12581        }
12582    }
12583
12584    #[test]
12585    fn create_table_registers_schema() {
12586        let mut e = Engine::new();
12587        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
12588            .unwrap();
12589        assert_eq!(e.catalog().table_count(), 1);
12590        let t = e.catalog().get("foo").unwrap();
12591        assert_eq!(t.schema().columns.len(), 2);
12592        assert_eq!(t.schema().columns[0].ty, DataType::Int);
12593        assert!(!t.schema().columns[0].nullable);
12594        assert_eq!(t.schema().columns[1].ty, DataType::Text);
12595    }
12596
12597    #[test]
12598    fn create_table_vector_default_is_f32_encoded() {
12599        let mut e = Engine::new();
12600        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
12601        let t = e.catalog().get("t").unwrap();
12602        assert_eq!(
12603            t.schema().columns[0].ty,
12604            DataType::Vector {
12605                dim: 8,
12606                encoding: VecEncoding::F32,
12607            },
12608        );
12609    }
12610
12611    #[test]
12612    fn create_table_vector_using_sq8_succeeds() {
12613        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
12614        // is lifted. CREATE TABLE persists an SQ8 column type in
12615        // the catalog; INSERT (next test) quantises raw f32 input.
12616        let mut e = Engine::new();
12617        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
12618        let t = e.catalog().get("t").unwrap();
12619        assert_eq!(
12620            t.schema().columns[0].ty,
12621            DataType::Vector {
12622                dim: 8,
12623                encoding: VecEncoding::Sq8,
12624            },
12625        );
12626    }
12627
12628    #[test]
12629    fn insert_into_sq8_column_quantises_f32_payload() {
12630        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
12631        // `Value::Vector(Vec<f32>)` literal into the column's
12632        // quantised representation. The row that lands in the
12633        // catalog must therefore hold a `Value::Sq8Vector`, not the
12634        // original f32 buffer — that's the bit that delivers the
12635        // 4× compression target.
12636        let mut e = Engine::new();
12637        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
12638        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
12639            .unwrap();
12640        let t = e.catalog().get("t").unwrap();
12641        assert_eq!(t.rows().len(), 1);
12642        match &t.rows()[0].values[0] {
12643            Value::Sq8Vector(q) => {
12644                assert_eq!(q.bytes.len(), 4);
12645                // min/max are derived from the payload: min=0.0, max=1.0.
12646                assert!((q.min - 0.0).abs() < 1e-6);
12647                assert!((q.max - 1.0).abs() < 1e-6);
12648            }
12649            other => panic!("expected Sq8Vector cell, got {other:?}"),
12650        }
12651    }
12652
12653    #[test]
12654    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
12655        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
12656        // converts the incoming `Value::Vector(Vec<f32>)` cell
12657        // into `Value::HalfVector(HalfVector)` via the new
12658        // `coerce_value` arm. The dequantised round-trip is
12659        // bit-exact for f16-representable values, so 0.0 / 0.25
12660        // / 0.5 / 1.0 hit their grid points exactly.
12661        let mut e = Engine::new();
12662        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
12663            .unwrap();
12664        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
12665            .unwrap();
12666        let t = e.catalog().get("t").unwrap();
12667        assert_eq!(t.rows().len(), 1);
12668        match &t.rows()[0].values[0] {
12669            Value::HalfVector(h) => {
12670                assert_eq!(h.dim(), 4);
12671                let back = h.to_f32_vec();
12672                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
12673                for (g, e) in back.iter().zip(expected.iter()) {
12674                    assert!(
12675                        (g - e).abs() < 1e-6,
12676                        "{g} vs {e} should be exact on f16 grid"
12677                    );
12678                }
12679            }
12680            other => panic!("expected HalfVector cell, got {other:?}"),
12681        }
12682    }
12683
12684    #[test]
12685    fn alter_index_rebuild_in_place_succeeds() {
12686        // v6.0.4: bare REBUILD (no encoding switch) walks every
12687        // row again to rebuild the NSW graph. Verifies the engine
12688        // dispatch + storage helper plumbing without changing any
12689        // cell encoding.
12690        let mut e = Engine::new();
12691        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
12692            .unwrap();
12693        for i in 0..8_i32 {
12694            #[allow(clippy::cast_precision_loss)]
12695            let base = (i as f32) * 0.1;
12696            e.execute(&alloc::format!(
12697                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
12698                b1 = base + 0.01,
12699                b2 = base + 0.02,
12700            ))
12701            .unwrap();
12702        }
12703        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
12704        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
12705        // Schema encoding stays F32 (no encoding clause).
12706        assert_eq!(
12707            e.catalog().get("t").unwrap().schema().columns[1].ty,
12708            DataType::Vector {
12709                dim: 3,
12710                encoding: VecEncoding::F32,
12711            },
12712        );
12713    }
12714
12715    #[test]
12716    fn alter_index_rebuild_with_encoding_switches_cell_type() {
12717        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
12718        // stored cell from F32 → SQ8 + rebuilds the graph atop the
12719        // new encoding. Post-rebuild, cells must be Sq8Vector and
12720        // the schema must report encoding = Sq8.
12721        let mut e = Engine::new();
12722        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
12723            .unwrap();
12724        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
12725            .unwrap();
12726        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
12727        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
12728            .unwrap();
12729        let t = e.catalog().get("t").unwrap();
12730        assert_eq!(
12731            t.schema().columns[1].ty,
12732            DataType::Vector {
12733                dim: 4,
12734                encoding: VecEncoding::Sq8,
12735            },
12736        );
12737        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
12738    }
12739
12740    #[test]
12741    fn alter_index_rebuild_unknown_index_errors() {
12742        let mut e = Engine::new();
12743        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
12744        assert!(
12745            matches!(
12746                &err,
12747                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
12748            ),
12749            "got: {err}"
12750        );
12751    }
12752
12753    #[test]
12754    fn alter_index_rebuild_on_btree_index_errors() {
12755        // REBUILD on a B-tree index has no semantic meaning in
12756        // v6.0.4 — rejected at the storage layer with `Unsupported`.
12757        let mut e = Engine::new();
12758        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12759        e.execute("INSERT INTO t VALUES (1)").unwrap();
12760        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
12761        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
12762        assert!(
12763            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
12764            "got: {err}"
12765        );
12766    }
12767
12768    #[test]
12769    fn prepared_insert_substitutes_placeholders() {
12770        // v6.1.1: prepare() parses once; execute_prepared() walks the
12771        // AST and replaces $1/$2 with the param Values BEFORE the
12772        // dispatch sees them. Same logical result as a simple-query
12773        // INSERT, but parse happens once per *statement*, not per
12774        // execution.
12775        let mut e = Engine::new();
12776        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
12777            .unwrap();
12778        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
12779        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
12780            e.execute_prepared(stmt.clone(), &[Value::Int(id), Value::Text(name.into())])
12781                .unwrap();
12782        }
12783        // Read back via simple-query SELECT.
12784        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
12785        let QueryResult::Rows { rows, .. } = rows_result else {
12786            panic!("expected Rows")
12787        };
12788        assert_eq!(rows.len(), 3);
12789    }
12790
12791    #[test]
12792    fn prepared_select_with_placeholder_filters_rows() {
12793        let mut e = Engine::new();
12794        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
12795            .unwrap();
12796        for i in 0..10_i32 {
12797            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
12798                .unwrap();
12799        }
12800        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
12801        let QueryResult::Rows { rows, .. } = e.execute_prepared(stmt, &[Value::Int(35)]).unwrap()
12802        else {
12803            panic!("expected Rows")
12804        };
12805        // v = 35 means i*7 = 35 → i = 5.
12806        assert_eq!(rows.len(), 1);
12807        assert_eq!(rows[0].values[0], Value::Int(5));
12808    }
12809
12810    #[test]
12811    fn prepared_too_few_params_errors() {
12812        let mut e = Engine::new();
12813        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12814        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
12815        let err = e.execute_prepared(stmt, &[]).unwrap_err();
12816        assert!(
12817            matches!(
12818                &err,
12819                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
12820            ),
12821            "got: {err}"
12822        );
12823    }
12824
12825    #[test]
12826    fn insert_into_half_column_dim_mismatch_errors() {
12827        let mut e = Engine::new();
12828        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
12829            .unwrap();
12830        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
12831        assert!(matches!(
12832            &err,
12833            EngineError::Storage(StorageError::TypeMismatch { .. })
12834        ));
12835    }
12836
12837    #[test]
12838    fn insert_into_sq8_column_dim_mismatch_errors() {
12839        // Dim mismatch falls through the `coerce_value` Vector→Sq8
12840        // arm's guard and surfaces as `TypeMismatch` — the same
12841        // error the F32 path produces today, so client error
12842        // handling stays uniform across encodings.
12843        let mut e = Engine::new();
12844        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
12845        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
12846        assert!(
12847            matches!(
12848                &err,
12849                EngineError::Storage(StorageError::TypeMismatch { .. })
12850            ),
12851            "got: {err}",
12852        );
12853    }
12854
12855    #[test]
12856    fn create_table_duplicate_errors() {
12857        let mut e = Engine::new();
12858        e.execute("CREATE TABLE foo (a INT)").unwrap();
12859        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
12860        assert!(matches!(
12861            err,
12862            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
12863        ));
12864    }
12865
12866    #[test]
12867    fn insert_into_unknown_table_errors() {
12868        let mut e = Engine::new();
12869        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
12870        assert!(matches!(
12871            err,
12872            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
12873        ));
12874    }
12875
12876    #[test]
12877    fn insert_happy_path_reports_one_affected() {
12878        let mut e = Engine::new();
12879        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12880        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
12881        assert_eq!(unwrap_command_ok(&r), 1);
12882        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
12883    }
12884
12885    #[test]
12886    fn insert_arity_mismatch_propagates() {
12887        let mut e = Engine::new();
12888        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
12889        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
12890        assert!(matches!(
12891            err,
12892            EngineError::Storage(StorageError::ArityMismatch { .. })
12893        ));
12894    }
12895
12896    #[test]
12897    fn insert_negative_integer_via_unary_minus() {
12898        let mut e = Engine::new();
12899        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12900        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
12901        let rows = e.catalog().get("foo").unwrap().rows();
12902        assert_eq!(rows[0].values[0], Value::Int(-7));
12903    }
12904
12905    #[test]
12906    fn insert_non_literal_expr_unsupported() {
12907        let mut e = Engine::new();
12908        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12909        let err = e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap_err();
12910        assert!(matches!(err, EngineError::Unsupported(_)));
12911    }
12912
12913    #[test]
12914    fn select_star_returns_all_rows_in_insertion_order() {
12915        let mut e = Engine::new();
12916        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
12917            .unwrap();
12918        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
12919        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
12920        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
12921
12922        let r = e.execute("SELECT * FROM foo").unwrap();
12923        let QueryResult::Rows { columns, rows } = r else {
12924            panic!("expected Rows")
12925        };
12926        assert_eq!(columns.len(), 2);
12927        assert_eq!(columns[0].name, "a");
12928        assert_eq!(rows.len(), 3);
12929        assert_eq!(
12930            rows[1].values,
12931            vec![Value::Int(2), Value::Text("two".into())]
12932        );
12933    }
12934
12935    #[test]
12936    fn select_star_on_empty_table_returns_zero_rows() {
12937        let mut e = Engine::new();
12938        e.execute("CREATE TABLE foo (a INT)").unwrap();
12939        let r = e.execute("SELECT * FROM foo").unwrap();
12940        match r {
12941            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
12942            QueryResult::CommandOk { .. } => panic!("expected Rows"),
12943        }
12944    }
12945
12946    // --- v0.4: WHERE + projection ------------------------------------------
12947
12948    fn make_three_row_users(e: &mut Engine) {
12949        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
12950            .unwrap();
12951        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
12952            .unwrap();
12953        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
12954            .unwrap();
12955        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
12956            .unwrap();
12957    }
12958
12959    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
12960        match r {
12961            QueryResult::Rows { columns, rows } => (columns, rows),
12962            QueryResult::CommandOk { .. } => panic!("expected Rows"),
12963        }
12964    }
12965
12966    #[test]
12967    fn where_filter_passes_only_true_rows() {
12968        let mut e = Engine::new();
12969        make_three_row_users(&mut e);
12970        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
12971        let (_, rows) = unwrap_rows(r);
12972        assert_eq!(rows.len(), 2);
12973        assert_eq!(rows[0].values[0], Value::Int(2));
12974        assert_eq!(rows[1].values[0], Value::Int(3));
12975    }
12976
12977    #[test]
12978    fn where_with_null_result_filters_out_row() {
12979        let mut e = Engine::new();
12980        make_three_row_users(&mut e);
12981        // score is NULL for bob → score > 80 is NULL → row excluded
12982        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
12983        let (_, rows) = unwrap_rows(r);
12984        assert_eq!(rows.len(), 1);
12985        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
12986    }
12987
12988    #[test]
12989    fn projection_named_columns() {
12990        let mut e = Engine::new();
12991        make_three_row_users(&mut e);
12992        let r = e.execute("SELECT name, score FROM users").unwrap();
12993        let (cols, rows) = unwrap_rows(r);
12994        assert_eq!(cols.len(), 2);
12995        assert_eq!(cols[0].name, "name");
12996        assert_eq!(cols[1].name, "score");
12997        assert_eq!(rows.len(), 3);
12998        assert_eq!(
12999            rows[0].values,
13000            vec![Value::Text("alice".into()), Value::Int(90)]
13001        );
13002    }
13003
13004    #[test]
13005    fn projection_with_column_alias() {
13006        let mut e = Engine::new();
13007        make_three_row_users(&mut e);
13008        let r = e
13009            .execute("SELECT name AS who FROM users WHERE id = 1")
13010            .unwrap();
13011        let (cols, rows) = unwrap_rows(r);
13012        assert_eq!(cols[0].name, "who");
13013        assert_eq!(rows.len(), 1);
13014        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
13015    }
13016
13017    #[test]
13018    fn qualified_column_with_table_alias_resolves() {
13019        let mut e = Engine::new();
13020        make_three_row_users(&mut e);
13021        let r = e
13022            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
13023            .unwrap();
13024        let (cols, rows) = unwrap_rows(r);
13025        assert_eq!(cols.len(), 2);
13026        assert_eq!(rows.len(), 2);
13027    }
13028
13029    #[test]
13030    fn qualified_column_with_wrong_alias_errors() {
13031        let mut e = Engine::new();
13032        make_three_row_users(&mut e);
13033        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
13034        assert!(matches!(
13035            err,
13036            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
13037        ));
13038    }
13039
13040    #[test]
13041    fn select_unknown_column_errors_in_projection() {
13042        let mut e = Engine::new();
13043        make_three_row_users(&mut e);
13044        let err = e.execute("SELECT ghost FROM users").unwrap_err();
13045        assert!(matches!(
13046            err,
13047            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
13048        ));
13049    }
13050
13051    #[test]
13052    fn where_unknown_column_errors() {
13053        let mut e = Engine::new();
13054        make_three_row_users(&mut e);
13055        let err = e
13056            .execute("SELECT * FROM users WHERE ghost = 1")
13057            .unwrap_err();
13058        assert!(matches!(
13059            err,
13060            EngineError::Eval(EvalError::ColumnNotFound { .. })
13061        ));
13062    }
13063
13064    #[test]
13065    fn expression_projection_evaluates_and_renders() {
13066        // Compound expressions in the SELECT list are evaluated per row;
13067        // the output column is typed TEXT, name defaults to the expression.
13068        let mut e = Engine::new();
13069        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
13070        e.execute("INSERT INTO t VALUES (3)").unwrap();
13071        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
13072        assert_eq!(rows.len(), 1);
13073        // The expression evaluates to integer 3; rendered as the cell value
13074        // (storage::Value::Int(3) since arithmetic kept ints).
13075        assert_eq!(rows[0].values[0], Value::Int(3));
13076    }
13077
13078    #[test]
13079    fn select_unknown_table_errors() {
13080        let mut e = Engine::new();
13081        let err = e.execute("SELECT * FROM ghost").unwrap_err();
13082        assert!(matches!(
13083            err,
13084            EngineError::Storage(StorageError::TableNotFound { .. })
13085        ));
13086    }
13087
13088    #[test]
13089    fn invalid_sql_returns_parse_error() {
13090        // v4.4: UPDATE is now real SQL, so use a true syntactic
13091        // garbage payload for the parse-error path.
13092        let mut e = Engine::new();
13093        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
13094        assert!(matches!(err, EngineError::Parse(_)));
13095    }
13096
13097    // --- v0.8 CREATE INDEX + index seek ------------------------------------
13098
13099    #[test]
13100    fn create_index_registers_on_table() {
13101        let mut e = Engine::new();
13102        make_three_row_users(&mut e);
13103        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
13104        let t = e.catalog().get("users").unwrap();
13105        assert_eq!(t.indices().len(), 1);
13106        assert_eq!(t.indices()[0].name, "by_name");
13107    }
13108
13109    #[test]
13110    fn create_index_on_unknown_table_errors() {
13111        let mut e = Engine::new();
13112        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
13113        assert!(matches!(
13114            err,
13115            EngineError::Storage(StorageError::TableNotFound { .. })
13116        ));
13117    }
13118
13119    #[test]
13120    fn create_index_on_unknown_column_errors() {
13121        let mut e = Engine::new();
13122        make_three_row_users(&mut e);
13123        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
13124        assert!(matches!(
13125            err,
13126            EngineError::Storage(StorageError::ColumnNotFound { .. })
13127        ));
13128    }
13129
13130    #[test]
13131    fn select_eq_uses_index_returns_same_rows_as_scan() {
13132        // Build two engines: one with an index, one without. Same query →
13133        // same row set (index is a planner optimisation, not a semantic
13134        // change).
13135        let mut without = Engine::new();
13136        make_three_row_users(&mut without);
13137        let mut with = Engine::new();
13138        make_three_row_users(&mut with);
13139        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
13140
13141        let q = "SELECT * FROM users WHERE id = 2";
13142        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
13143        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
13144        assert_eq!(no_idx_rows, idx_rows);
13145        assert_eq!(idx_rows.len(), 1);
13146    }
13147
13148    #[test]
13149    fn select_eq_with_no_matching_index_value_returns_empty() {
13150        let mut e = Engine::new();
13151        make_three_row_users(&mut e);
13152        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
13153        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
13154        assert_eq!(rows.len(), 0);
13155    }
13156
13157    // --- v0.9 transactions -------------------------------------------------
13158
13159    #[test]
13160    fn begin_sets_in_transaction_flag() {
13161        let mut e = Engine::new();
13162        assert!(!e.in_transaction());
13163        e.execute("BEGIN").unwrap();
13164        assert!(e.in_transaction());
13165    }
13166
13167    #[test]
13168    fn double_begin_errors() {
13169        let mut e = Engine::new();
13170        e.execute("BEGIN").unwrap();
13171        let err = e.execute("BEGIN").unwrap_err();
13172        assert_eq!(err, EngineError::TransactionAlreadyOpen);
13173    }
13174
13175    #[test]
13176    fn commit_without_begin_errors() {
13177        let mut e = Engine::new();
13178        let err = e.execute("COMMIT").unwrap_err();
13179        assert_eq!(err, EngineError::NoActiveTransaction);
13180    }
13181
13182    #[test]
13183    fn rollback_without_begin_errors() {
13184        let mut e = Engine::new();
13185        let err = e.execute("ROLLBACK").unwrap_err();
13186        assert_eq!(err, EngineError::NoActiveTransaction);
13187    }
13188
13189    #[test]
13190    fn commit_applies_shadow_to_committed_catalog() {
13191        let mut e = Engine::new();
13192        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
13193        e.execute("BEGIN").unwrap();
13194        e.execute("INSERT INTO t VALUES (1)").unwrap();
13195        e.execute("INSERT INTO t VALUES (2)").unwrap();
13196        e.execute("COMMIT").unwrap();
13197        assert!(!e.in_transaction());
13198        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
13199    }
13200
13201    #[test]
13202    fn rollback_discards_shadow() {
13203        let mut e = Engine::new();
13204        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
13205        e.execute("BEGIN").unwrap();
13206        e.execute("INSERT INTO t VALUES (1)").unwrap();
13207        e.execute("INSERT INTO t VALUES (2)").unwrap();
13208        e.execute("ROLLBACK").unwrap();
13209        assert!(!e.in_transaction());
13210        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
13211    }
13212
13213    #[test]
13214    fn select_during_tx_sees_uncommitted_writes_own_session() {
13215        // The shadow catalog is read by SELECTs while a TX is open — the
13216        // session can see its own pending writes.
13217        let mut e = Engine::new();
13218        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
13219        e.execute("BEGIN").unwrap();
13220        e.execute("INSERT INTO t VALUES (42)").unwrap();
13221        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
13222        assert_eq!(rows.len(), 1);
13223        assert_eq!(rows[0].values[0], Value::Int(42));
13224    }
13225
13226    #[test]
13227    fn snapshot_with_no_users_is_bare_catalog_format() {
13228        let mut e = Engine::new();
13229        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13230        let bytes = e.snapshot();
13231        assert_eq!(
13232            &bytes[..8],
13233            b"SPGDB001",
13234            "must be the bare v3.x catalog magic"
13235        );
13236        let e2 = Engine::restore_envelope(&bytes).unwrap();
13237        assert!(e2.users().is_empty());
13238        assert_eq!(e2.catalog().table_count(), 1);
13239    }
13240
13241    #[test]
13242    fn snapshot_with_users_round_trips_both_via_envelope() {
13243        let mut e = Engine::new();
13244        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13245        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
13246        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
13247            .unwrap();
13248        let bytes = e.snapshot();
13249        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
13250        let e2 = Engine::restore_envelope(&bytes).unwrap();
13251        assert_eq!(e2.users().len(), 2);
13252        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
13253        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
13254        assert_eq!(e2.verify_user("alice", "wrong"), None);
13255        assert_eq!(e2.catalog().table_count(), 1);
13256    }
13257
13258    #[test]
13259    fn ddl_inside_tx_also_rolled_back() {
13260        let mut e = Engine::new();
13261        e.execute("BEGIN").unwrap();
13262        e.execute("CREATE TABLE t (v INT)").unwrap();
13263        // Visible inside the TX.
13264        e.execute("SELECT * FROM t").unwrap();
13265        e.execute("ROLLBACK").unwrap();
13266        // Gone after rollback.
13267        let err = e.execute("SELECT * FROM t").unwrap_err();
13268        assert!(matches!(
13269            err,
13270            EngineError::Storage(StorageError::TableNotFound { .. })
13271        ));
13272    }
13273
13274    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
13275
13276    #[test]
13277    fn create_publication_lands_in_catalog() {
13278        let mut e = Engine::new();
13279        assert!(e.publications().is_empty());
13280        e.execute("CREATE PUBLICATION pub_a").unwrap();
13281        assert_eq!(e.publications().len(), 1);
13282        assert!(e.publications().contains("pub_a"));
13283    }
13284
13285    #[test]
13286    fn create_publication_duplicate_errors() {
13287        let mut e = Engine::new();
13288        e.execute("CREATE PUBLICATION pub_a").unwrap();
13289        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
13290        assert!(
13291            alloc::format!("{err:?}").contains("DuplicateName"),
13292            "got {err:?}"
13293        );
13294    }
13295
13296    #[test]
13297    fn drop_publication_silent_when_absent() {
13298        let mut e = Engine::new();
13299        // PG-compatible: DROP a publication that doesn't exist
13300        // succeeds (no-op) but reports zero affected.
13301        let r = e.execute("DROP PUBLICATION nope").unwrap();
13302        match r {
13303            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
13304            other => panic!("expected CommandOk, got {other:?}"),
13305        }
13306    }
13307
13308    #[test]
13309    fn drop_publication_present_reports_one_affected() {
13310        let mut e = Engine::new();
13311        e.execute("CREATE PUBLICATION pub_a").unwrap();
13312        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
13313        match r {
13314            QueryResult::CommandOk {
13315                affected,
13316                modified_catalog,
13317            } => {
13318                assert_eq!(affected, 1);
13319                assert!(modified_catalog);
13320            }
13321            other => panic!("expected CommandOk, got {other:?}"),
13322        }
13323        assert!(e.publications().is_empty());
13324    }
13325
13326    #[test]
13327    fn publications_persist_across_snapshot_restore() {
13328        // The persist-across-restart ship-gate at the engine layer —
13329        // snapshot → restore_envelope round trip must preserve the
13330        // publication catalog. The spg-server e2e covers the
13331        // process-restart variant.
13332        let mut e = Engine::new();
13333        e.execute("CREATE PUBLICATION pub_a").unwrap();
13334        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES")
13335            .unwrap();
13336        let snap = e.snapshot();
13337        let e2 = Engine::restore_envelope(&snap).unwrap();
13338        assert_eq!(e2.publications().len(), 2);
13339        assert!(e2.publications().contains("pub_a"));
13340        assert!(e2.publications().contains("pub_b"));
13341    }
13342
13343    #[test]
13344    fn create_publication_allowed_inside_transaction() {
13345        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
13346        // CREATE PUBLICATION inside a TX and the auto-commit
13347        // wrap path needs the same allowance.
13348        let mut e = Engine::new();
13349        e.execute("BEGIN").unwrap();
13350        e.execute("CREATE PUBLICATION pub_a").unwrap();
13351        e.execute("COMMIT").unwrap();
13352        assert!(e.publications().contains("pub_a"));
13353    }
13354
13355    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
13356
13357    #[test]
13358    fn create_publication_for_table_list_lands_with_scope() {
13359        let mut e = Engine::new();
13360        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
13361        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
13362        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
13363            .unwrap();
13364        let scope = e.publications().get("pub_a").cloned();
13365        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
13366            panic!("expected ForTables scope, got {scope:?}")
13367        };
13368        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
13369    }
13370
13371    #[test]
13372    fn create_publication_all_tables_except_lands_with_scope() {
13373        let mut e = Engine::new();
13374        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
13375            .unwrap();
13376        let scope = e.publications().get("pub_a").cloned();
13377        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
13378            panic!("expected AllTablesExcept scope, got {scope:?}")
13379        };
13380        assert_eq!(ts, alloc::vec!["t3".to_string()]);
13381    }
13382
13383    #[test]
13384    fn show_publications_empty_returns_zero_rows() {
13385        let e = Engine::new();
13386        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
13387        let QueryResult::Rows { rows, columns } = r else {
13388            panic!()
13389        };
13390        assert!(rows.is_empty());
13391        assert_eq!(columns.len(), 3);
13392        assert_eq!(columns[0].name, "name");
13393        assert_eq!(columns[1].name, "scope");
13394        assert_eq!(columns[2].name, "table_count");
13395    }
13396
13397    #[test]
13398    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
13399        let mut e = Engine::new();
13400        e.execute("CREATE PUBLICATION z_pub").unwrap();
13401        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
13402            .unwrap();
13403        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
13404            .unwrap();
13405        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
13406        let QueryResult::Rows { rows, .. } = r else {
13407            panic!()
13408        };
13409        assert_eq!(rows.len(), 3);
13410        // Alphabetical order: a_pub, m_pub, z_pub.
13411        let names: Vec<&str> = rows
13412            .iter()
13413            .map(|r| {
13414                if let Value::Text(s) = &r.values[0] {
13415                    s.as_str()
13416                } else {
13417                    panic!()
13418                }
13419            })
13420            .collect();
13421        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
13422        // Row 0 — a_pub scope summary + table_count = 2.
13423        match &rows[0].values[1] {
13424            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
13425            other => panic!("expected Text, got {other:?}"),
13426        }
13427        assert_eq!(rows[0].values[2], Value::Int(2));
13428        // Row 1 — m_pub.
13429        match &rows[1].values[1] {
13430            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
13431            other => panic!("expected Text, got {other:?}"),
13432        }
13433        assert_eq!(rows[1].values[2], Value::Int(1));
13434        // Row 2 — z_pub (AllTables → NULL count).
13435        match &rows[2].values[1] {
13436            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
13437            other => panic!("expected Text, got {other:?}"),
13438        }
13439        assert_eq!(rows[2].values[2], Value::Null);
13440    }
13441
13442    #[test]
13443    fn for_list_scopes_persist_across_snapshot() {
13444        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
13445        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
13446        let mut e = Engine::new();
13447        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
13448        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
13449            .unwrap();
13450        let snap = e.snapshot();
13451        let e2 = Engine::restore_envelope(&snap).unwrap();
13452        assert_eq!(e2.publications().len(), 2);
13453        let p1 = e2.publications().get("p1").cloned();
13454        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
13455            panic!("p1 scope lost: {p1:?}")
13456        };
13457        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
13458        let p2 = e2.publications().get("p2").cloned();
13459        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
13460            panic!("p2 scope lost: {p2:?}")
13461        };
13462        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
13463    }
13464
13465    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
13466
13467    #[test]
13468    fn create_subscription_lands_in_catalog_with_defaults() {
13469        let mut e = Engine::new();
13470        e.execute(
13471            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
13472        )
13473        .unwrap();
13474        let s = e.subscriptions().get("sub_a").cloned().expect("present");
13475        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
13476        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
13477        assert!(s.enabled);
13478        assert_eq!(s.last_received_pos, 0);
13479    }
13480
13481    #[test]
13482    fn create_subscription_duplicate_name_errors() {
13483        let mut e = Engine::new();
13484        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
13485            .unwrap();
13486        let err = e
13487            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
13488            .unwrap_err();
13489        assert!(
13490            alloc::format!("{err:?}").contains("DuplicateName"),
13491            "got {err:?}"
13492        );
13493    }
13494
13495    #[test]
13496    fn drop_subscription_silent_when_absent() {
13497        let mut e = Engine::new();
13498        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
13499        match r {
13500            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
13501            other => panic!("expected CommandOk, got {other:?}"),
13502        }
13503    }
13504
13505    #[test]
13506    fn subscription_advance_updates_last_pos_monotone() {
13507        let mut e = Engine::new();
13508        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
13509            .unwrap();
13510        assert!(e.subscription_advance("s", 100));
13511        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
13512        assert!(e.subscription_advance("s", 50)); // stale → ignored
13513        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
13514        assert!(e.subscription_advance("s", 200));
13515        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
13516        assert!(!e.subscription_advance("missing", 1));
13517    }
13518
13519    #[test]
13520    fn show_subscriptions_returns_rows_ordered_by_name() {
13521        let mut e = Engine::new();
13522        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
13523            .unwrap();
13524        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
13525            .unwrap();
13526        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
13527        let QueryResult::Rows { rows, columns } = r else {
13528            panic!()
13529        };
13530        assert_eq!(rows.len(), 2);
13531        assert_eq!(columns.len(), 5);
13532        assert_eq!(columns[0].name, "name");
13533        assert_eq!(columns[4].name, "last_received_pos");
13534        // Alphabetical: a_sub, z_sub.
13535        let names: Vec<&str> = rows
13536            .iter()
13537            .map(|r| {
13538                if let Value::Text(s) = &r.values[0] {
13539                    s.as_str()
13540                } else {
13541                    panic!()
13542                }
13543            })
13544            .collect();
13545        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
13546        // Row 0: a_sub
13547        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
13548        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
13549        assert_eq!(rows[0].values[3], Value::Bool(true));
13550        assert_eq!(rows[0].values[4], Value::BigInt(0));
13551        // Row 1: z_sub — publications join with ", "
13552        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
13553    }
13554
13555    #[test]
13556    fn subscriptions_persist_across_snapshot_envelope_v4() {
13557        let mut e = Engine::new();
13558        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
13559            .unwrap();
13560        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
13561            .unwrap();
13562        e.subscription_advance("s2", 42);
13563        let snap = e.snapshot();
13564        let e2 = Engine::restore_envelope(&snap).unwrap();
13565        assert_eq!(e2.subscriptions().len(), 2);
13566        let s1 = e2.subscriptions().get("s1").unwrap();
13567        assert_eq!(s1.conn_str, "h=A");
13568        assert_eq!(
13569            s1.publications,
13570            alloc::vec!["p1".to_string(), "p2".to_string()]
13571        );
13572        assert_eq!(s1.last_received_pos, 0);
13573        let s2 = e2.subscriptions().get("s2").unwrap();
13574        assert_eq!(s2.last_received_pos, 42);
13575    }
13576
13577    #[test]
13578    fn v3_envelope_loads_with_empty_subscriptions() {
13579        // v3 snapshot (publications-only). Forge it by hand so we
13580        // verify v6.1.4 readers don't panic — they must surface
13581        // empty subscriptions and a populated publication table.
13582        let mut e = Engine::new();
13583        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
13584        let catalog = e.catalog.serialize();
13585        let users = crate::users::serialize_users(&e.users);
13586        let pubs = e.publications.serialize();
13587        let mut buf = Vec::new();
13588        buf.extend_from_slice(b"SPGENV01");
13589        buf.push(3u8); // v3
13590        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13591        buf.extend_from_slice(&catalog);
13592        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13593        buf.extend_from_slice(&users);
13594        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
13595        buf.extend_from_slice(&pubs);
13596        let crc = spg_crypto::crc32::crc32(&buf);
13597        buf.extend_from_slice(&crc.to_le_bytes());
13598
13599        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
13600        assert!(e2.subscriptions().is_empty());
13601        assert!(e2.publications().contains("pub_legacy"));
13602    }
13603
13604    #[test]
13605    fn create_subscription_allowed_inside_transaction() {
13606        let mut e = Engine::new();
13607        e.execute("BEGIN").unwrap();
13608        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
13609            .unwrap();
13610        e.execute("COMMIT").unwrap();
13611        assert!(e.subscriptions().contains("s"));
13612    }
13613
13614    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
13615    #[test]
13616    fn analyze_populates_histogram_bounds() {
13617        let mut e = Engine::new();
13618        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)")
13619            .unwrap();
13620        for i in 0..50 {
13621            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'name{i}')"))
13622                .unwrap();
13623        }
13624        e.execute("ANALYZE t").unwrap();
13625        let stats = e.statistics();
13626        let id_stats = stats.get("t", "id").unwrap();
13627        assert!(id_stats.histogram_bounds.len() >= 2);
13628        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
13629        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
13630        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
13631        assert_eq!(id_stats.n_distinct, 50);
13632    }
13633
13634    #[test]
13635    fn reanalyze_overwrites_prior_stats() {
13636        let mut e = Engine::new();
13637        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13638        for i in 0..10 {
13639            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13640                .unwrap();
13641        }
13642        e.execute("ANALYZE t").unwrap();
13643        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
13644        assert_eq!(n1, 10);
13645        for i in 10..30 {
13646            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13647                .unwrap();
13648        }
13649        e.execute("ANALYZE t").unwrap();
13650        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
13651        assert_eq!(n2, 30);
13652    }
13653
13654    #[test]
13655    fn analyze_unknown_table_errors() {
13656        let mut e = Engine::new();
13657        let err = e.execute("ANALYZE nonexistent").unwrap_err();
13658        assert!(matches!(
13659            err,
13660            EngineError::Storage(StorageError::TableNotFound { .. })
13661        ));
13662    }
13663
13664    #[test]
13665    fn bare_analyze_covers_all_user_tables() {
13666        let mut e = Engine::new();
13667        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
13668        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
13669        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
13670        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
13671        let r = e.execute("ANALYZE").unwrap();
13672        match r {
13673            QueryResult::CommandOk {
13674                affected,
13675                modified_catalog,
13676            } => {
13677                assert_eq!(affected, 2);
13678                assert!(modified_catalog);
13679            }
13680            other => panic!("expected CommandOk, got {other:?}"),
13681        }
13682        assert!(e.statistics().get("t1", "id").is_some());
13683        assert!(e.statistics().get("t2", "name").is_some());
13684    }
13685
13686    #[test]
13687    fn select_from_spg_statistic_returns_rows_per_column() {
13688        let mut e = Engine::new();
13689        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
13690            .unwrap();
13691        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
13692        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
13693        e.execute("ANALYZE t").unwrap();
13694        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
13695        let QueryResult::Rows { rows, columns } = r else {
13696            panic!()
13697        };
13698        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
13699        assert_eq!(columns.len(), 6);
13700        assert_eq!(columns[0].name, "table_name");
13701        assert_eq!(columns[4].name, "histogram_bounds");
13702        assert_eq!(columns[5].name, "cold_row_count");
13703        assert_eq!(rows.len(), 2, "one row per column of t");
13704        // Sorted by (table_name, column_name).
13705        match (&rows[0].values[0], &rows[0].values[1]) {
13706            (Value::Text(t), Value::Text(c)) => {
13707                assert_eq!(t, "t");
13708                // BTreeMap orders (table, column); columns "id" < "label".
13709                assert_eq!(c, "id");
13710            }
13711            _ => panic!(),
13712        }
13713    }
13714
13715    #[test]
13716    fn analyze_skips_vector_columns() {
13717        // Vector columns have their own stats shape (HNSW graph);
13718        // ANALYZE leaves them out of spg_statistic.
13719        let mut e = Engine::new();
13720        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
13721            .unwrap();
13722        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
13723        e.execute("ANALYZE t").unwrap();
13724        assert!(e.statistics().get("t", "id").is_some());
13725        assert!(e.statistics().get("t", "v").is_none());
13726    }
13727
13728    #[test]
13729    fn statistics_persist_across_envelope_v5_round_trip() {
13730        let mut e = Engine::new();
13731        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13732        for i in 0..20 {
13733            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13734                .unwrap();
13735        }
13736        e.execute("ANALYZE").unwrap();
13737        let snap = e.snapshot();
13738        let e2 = Engine::restore_envelope(&snap).unwrap();
13739        let s = e2.statistics().get("t", "id").unwrap();
13740        assert_eq!(s.n_distinct, 20);
13741    }
13742
13743    // ── v6.2.1 auto-analyze threshold ───────────────────────────
13744
13745    #[test]
13746    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
13747        // For a table with 0 rows then 10 inserts → modified=10,
13748        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
13749        // after the 10th INSERT the threshold is met.
13750        let mut e = Engine::new();
13751        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13752        for i in 0..9 {
13753            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13754                .unwrap();
13755        }
13756        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
13757        e.execute("INSERT INTO t VALUES (9)").unwrap();
13758        let needs = e.tables_needing_analyze();
13759        assert_eq!(needs, alloc::vec!["t".to_string()]);
13760    }
13761
13762    #[test]
13763    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
13764        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
13765        // Each new INSERT bumps both modified and row_count, so to
13766        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
13767        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
13768        // more (200 total mods, row_count=1200, threshold=120 → fire).
13769        let mut e = Engine::new();
13770        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13771        for i in 0..1000 {
13772            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13773                .unwrap();
13774        }
13775        e.execute("ANALYZE t").unwrap();
13776        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
13777        for i in 1000..1050 {
13778            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13779                .unwrap();
13780        }
13781        assert!(
13782            e.tables_needing_analyze().is_empty(),
13783            "50 inserts < threshold of ~105"
13784        );
13785        for i in 1050..1200 {
13786            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13787                .unwrap();
13788        }
13789        assert_eq!(
13790            e.tables_needing_analyze(),
13791            alloc::vec!["t".to_string()],
13792            "200 inserts > 0.1 × 1200 threshold"
13793        );
13794    }
13795
13796    #[test]
13797    fn auto_analyze_threshold_resets_after_analyze() {
13798        let mut e = Engine::new();
13799        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13800        for i in 0..200 {
13801            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13802                .unwrap();
13803        }
13804        assert!(!e.tables_needing_analyze().is_empty());
13805        e.execute("ANALYZE").unwrap();
13806        assert!(
13807            e.tables_needing_analyze().is_empty(),
13808            "ANALYZE must reset the counter"
13809        );
13810    }
13811
13812    #[test]
13813    fn auto_analyze_threshold_tracks_updates_and_deletes() {
13814        let mut e = Engine::new();
13815        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
13816            .unwrap();
13817        for i in 0..50 {
13818            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
13819                .unwrap();
13820        }
13821        e.execute("ANALYZE t").unwrap();
13822        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
13823        // × max(50, 100) = 10. So 25 >= 10 → trigger.
13824        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
13825        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
13826        assert_eq!(e.tables_needing_analyze(), alloc::vec!["t".to_string()]);
13827    }
13828
13829    #[test]
13830    fn v4_envelope_loads_with_empty_statistics() {
13831        // Forge a v4 envelope by hand: catalog + users + pubs +
13832        // subs trailer, no statistics. A v6.2.0 reader must accept
13833        // it and surface an empty Statistics.
13834        let mut e = Engine::new();
13835        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
13836            .unwrap();
13837        let catalog = e.catalog.serialize();
13838        let users = crate::users::serialize_users(&e.users);
13839        let pubs = e.publications.serialize();
13840        let subs = e.subscriptions.serialize();
13841        let mut buf = Vec::new();
13842        buf.extend_from_slice(b"SPGENV01");
13843        buf.push(4u8);
13844        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13845        buf.extend_from_slice(&catalog);
13846        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13847        buf.extend_from_slice(&users);
13848        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
13849        buf.extend_from_slice(&pubs);
13850        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
13851        buf.extend_from_slice(&subs);
13852        let crc = spg_crypto::crc32::crc32(&buf);
13853        buf.extend_from_slice(&crc.to_le_bytes());
13854        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
13855        assert!(e2.statistics().is_empty());
13856    }
13857
13858    #[test]
13859    fn v1_v2_envelope_loads_with_empty_publications() {
13860        // A snapshot taken before v6.1.2 (no publication trailer,
13861        // envelope v2) must still deserialise — and the resulting
13862        // engine must report zero publications. Use the engine's own
13863        // round-trip with no publications: that emits v3 but with an
13864        // empty pubs block. Then forge a v2 envelope by hand to lock
13865        // the back-compat path.
13866        let mut e = Engine::new();
13867        // Force users to be non-empty so the snapshot takes the
13868        // envelope path rather than the bare-catalog fallback.
13869        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
13870            .unwrap();
13871
13872        // Forge an envelope v2: same shape as v3 but no pubs trailer.
13873        let catalog = e.catalog.serialize();
13874        let users = crate::users::serialize_users(&e.users);
13875        let mut buf = Vec::new();
13876        buf.extend_from_slice(b"SPGENV01");
13877        buf.push(2u8); // v2
13878        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13879        buf.extend_from_slice(&catalog);
13880        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13881        buf.extend_from_slice(&users);
13882        let crc = spg_crypto::crc32::crc32(&buf);
13883        buf.extend_from_slice(&crc.to_le_bytes());
13884
13885        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
13886        assert!(e2.publications().is_empty());
13887    }
13888}