Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod describe;
11pub mod eval;
12pub mod fts;
13pub mod json;
14pub mod memoize;
15pub mod plan_cache;
16pub mod publications;
17pub mod query_stats;
18pub mod reorder;
19pub mod selectivity;
20pub mod statistics;
21pub mod subscriptions;
22pub mod triggers;
23pub mod users;
24
25pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
26
27use alloc::borrow::Cow;
28use alloc::boxed::Box;
29use alloc::collections::BTreeMap;
30use alloc::string::{String, ToString};
31use alloc::vec::Vec;
32use core::fmt;
33
34use spg_sql::ast::{
35    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement, CreatePublicationStatement,
36    CreateSubscriptionStatement, CreateTableStatement, CreateUserStatement, Expr, FrameBound,
37    FrameKind, FromClause, IndexMethod, InsertStatement, JoinKind, Literal, OrderBy, SelectItem,
38    SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding as SqlVecEncoding,
39    WindowFrame,
40};
41// v7.16.0 — re-export the parsed-statement AST so downstream
42// crates (spg-embedded → spg-sqlx) don't need a direct dep on
43// spg-sql for the prepare/bind handle.
44pub use spg_sql::ast::Statement as ParsedStatement;
45use spg_sql::parser::{self, ParseError};
46use spg_storage::{
47    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
48    TableSchema, Value, VecEncoding,
49};
50
51use crate::eval::{EvalContext, EvalError};
52
53/// Result of executing one statement.
54#[derive(Debug, Clone, PartialEq)]
55#[non_exhaustive]
56pub enum QueryResult {
57    /// DDL or DML succeeded.
58    ///
59    /// `affected` is the row count for `INSERT` and 0 elsewhere.
60    /// `modified_catalog` tells the server whether this statement
61    /// caused the *committed* catalog to change — it's the signal to
62    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
63    /// statements executed inside a transaction (those only touch the
64    /// shadow), and true for `COMMIT` and for writes outside a TX.
65    CommandOk {
66        affected: usize,
67        modified_catalog: bool,
68    },
69    /// `SELECT` returned a (possibly empty) row set.
70    Rows {
71        columns: Vec<ColumnSchema>,
72        rows: Vec<Row>,
73    },
74}
75
76/// All errors the engine can return.
77///
78/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
79/// must include a `_` arm so new variants in subsequent v7.x releases
80/// are not breaking changes.
81#[derive(Debug, Clone, PartialEq)]
82#[non_exhaustive]
83pub enum EngineError {
84    Parse(ParseError),
85    Storage(StorageError),
86    Eval(EvalError),
87    /// Front-end accepted a construct that the v0.x executor doesn't support.
88    Unsupported(String),
89    /// `BEGIN` while another transaction is already open.
90    TransactionAlreadyOpen,
91    /// `COMMIT` / `ROLLBACK` with no active transaction.
92    NoActiveTransaction,
93    /// v4.0 sentinel: `execute_readonly` got a statement that
94    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
95    /// The caller should retake the write lock and dispatch through
96    /// `execute(&mut self)` instead.
97    WriteRequired,
98    /// v4.2: a SELECT would have returned more rows than the
99    /// configured `max_query_rows` cap. Carries the cap.
100    RowLimitExceeded(usize),
101    /// v4.5: cooperative cancellation — the host (server's
102    /// per-query watchdog) set the cancel flag while a long-running
103    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
104    /// is discarded; the caller should surface this as a timeout
105    /// to the client.
106    Cancelled,
107}
108
109impl fmt::Display for EngineError {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        match self {
112            Self::Parse(e) => write!(f, "parse: {e}"),
113            Self::Storage(e) => write!(f, "storage: {e}"),
114            Self::Eval(e) => write!(f, "eval: {e}"),
115            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
116            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
117            Self::NoActiveTransaction => f.write_str("no active transaction"),
118            Self::WriteRequired => {
119                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
120            }
121            Self::RowLimitExceeded(n) => {
122                write!(f, "query exceeded max_query_rows={n}")
123            }
124            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
125        }
126    }
127}
128
129impl From<ParseError> for EngineError {
130    fn from(e: ParseError) -> Self {
131        Self::Parse(e)
132    }
133}
134impl From<StorageError> for EngineError {
135    fn from(e: StorageError) -> Self {
136        Self::Storage(e)
137    }
138}
139impl From<EvalError> for EngineError {
140    fn from(e: EvalError) -> Self {
141        Self::Eval(e)
142    }
143}
144
145/// The execution engine. Holds the catalog and (later) other server-scope
146/// state. `Engine::new()` is intentionally cheap so callers can construct one
147/// per database, per test.
148/// Function pointer that returns "now" as microseconds since Unix
149/// epoch. The engine is `no_std`, so it can't reach for `std::time`
150/// itself — callers (`spg-server`, the sqllogictest runner) inject a
151/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
152/// `Unsupported`.
153pub type ClockFn = fn() -> i64;
154
155/// Function pointer that produces 16 cryptographically random bytes.
156/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
157/// itself — host (`spg-server`) injects an OS-backed source. `None`
158/// means SQL-driven `CREATE USER` falls back to a deterministic salt
159/// derived from the username (acceptable in tests; the server always
160/// installs a real RNG so production paths never see this).
161pub type SaltFn = fn() -> [u8; 16];
162
163/// v4.5 cooperative cancellation token. A long-running SELECT /
164/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
165/// and bails with `EngineError::Cancelled`. The host
166/// (`spg-server`) creates an `AtomicBool` per query, spawns a
167/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
168/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
169///
170/// `CancelToken::none()` is a no-op — used by the legacy `execute`
171/// and `execute_readonly` entry points so existing callers don't
172/// change.
173#[derive(Debug, Clone, Copy)]
174pub struct CancelToken<'a> {
175    flag: Option<&'a core::sync::atomic::AtomicBool>,
176}
177
178impl<'a> CancelToken<'a> {
179    #[must_use]
180    pub const fn none() -> Self {
181        Self { flag: None }
182    }
183
184    #[must_use]
185    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
186        Self { flag: Some(f) }
187    }
188
189    #[must_use]
190    pub fn is_cancelled(self) -> bool {
191        self.flag
192            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
193    }
194
195    /// Returns `Err(Cancelled)` if the token has been tripped.
196    /// Used at row-loop checkpoints to bail cooperatively without
197    /// scattering raw `is_cancelled` checks across the executor.
198    #[inline]
199    pub fn check(self) -> Result<(), EngineError> {
200        if self.is_cancelled() {
201            Err(EngineError::Cancelled)
202        } else {
203            Ok(())
204        }
205    }
206}
207
208// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
209// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
210//
211// Wraps a catalog blob + a user blob behind a small header so the
212// server can persist both atomically without inventing a new file.
213// Bare catalog blobs (v3.x) still load via `restore_envelope` since
214// the magic check fails fast and the function falls back to
215// `Catalog::deserialize`.
216//
217// Layout — v1 (v4.1, no CRC):
218//   [8 bytes magic "SPGENV01"]
219//   [u8 version = 1]
220//   [u32 catalog_len][catalog bytes]
221//   [u32 users_len][users bytes]
222//
223// Layout — v2 (v4.37, CRC32 of body):
224//   [8 bytes magic "SPGENV01"]
225//   [u8 version = 2]
226//   [u32 catalog_len][catalog bytes]
227//   [u32 users_len][users bytes]
228//   [u32 crc32]                      ← CRC32 of every byte before it.
229//
230// Layout — v3 (v6.1.2, publications trailer):
231//   [8 bytes magic "SPGENV01"]
232//   [u8 version = 3]
233//   [u32 catalog_len][catalog bytes]
234//   [u32 users_len][users bytes]
235//   [u32 pubs_len][publications bytes]
236//   [u32 crc32]
237//
238// Layout — v4 (v6.1.4, subscriptions trailer):
239//   [8 bytes magic "SPGENV01"]
240//   [u8 version = 4]
241//   [u32 catalog_len][catalog bytes]
242//   [u32 users_len][users bytes]
243//   [u32 pubs_len][publications bytes]
244//   [u32 subs_len][subscriptions bytes]
245//   [u32 crc32]
246//
247// Layout — v5 (v6.2.0, statistics trailer):
248//   [8 bytes magic "SPGENV01"]
249//   [u8 version = 5]
250//   [u32 catalog_len][catalog bytes]
251//   [u32 users_len][users bytes]
252//   [u32 pubs_len][publications bytes]
253//   [u32 subs_len][subscriptions bytes]
254//   [u32 stats_len][statistics bytes]      ← NEW
255//   [u32 crc32]
256//
257// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
258// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
259// statistics; v3 loads with empty subscriptions + statistics; v4
260// loads with empty statistics; v5 deserialises all three. Older
261// SPG versions reading a v5 envelope fall through the version
262// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
263// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
264
265const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
266const ENVELOPE_VERSION_V1: u8 = 1;
267const ENVELOPE_VERSION_V2: u8 = 2;
268const ENVELOPE_VERSION_V3: u8 = 3;
269const ENVELOPE_VERSION_V4: u8 = 4;
270const ENVELOPE_VERSION_V5: u8 = 5;
271
272fn build_envelope(catalog: &[u8], users: &[u8], pubs: &[u8], subs: &[u8], stats: &[u8]) -> Vec<u8> {
273    let mut out = Vec::with_capacity(
274        8 + 1
275            + 4
276            + catalog.len()
277            + 4
278            + users.len()
279            + 4
280            + pubs.len()
281            + 4
282            + subs.len()
283            + 4
284            + stats.len()
285            + 4,
286    );
287    out.extend_from_slice(ENVELOPE_MAGIC);
288    out.push(ENVELOPE_VERSION_V5);
289    out.extend_from_slice(
290        &u32::try_from(catalog.len())
291            .expect("≤ 4G catalog")
292            .to_le_bytes(),
293    );
294    out.extend_from_slice(catalog);
295    out.extend_from_slice(
296        &u32::try_from(users.len())
297            .expect("≤ 4G users")
298            .to_le_bytes(),
299    );
300    out.extend_from_slice(users);
301    out.extend_from_slice(
302        &u32::try_from(pubs.len())
303            .expect("≤ 4G publications")
304            .to_le_bytes(),
305    );
306    out.extend_from_slice(pubs);
307    out.extend_from_slice(
308        &u32::try_from(subs.len())
309            .expect("≤ 4G subscriptions")
310            .to_le_bytes(),
311    );
312    out.extend_from_slice(subs);
313    out.extend_from_slice(
314        &u32::try_from(stats.len())
315            .expect("≤ 4G statistics")
316            .to_le_bytes(),
317    );
318    out.extend_from_slice(stats);
319    let crc = spg_crypto::crc32::crc32(&out);
320    out.extend_from_slice(&crc.to_le_bytes());
321    out
322}
323
324/// Outcome of envelope parsing: either bare-catalog fallback, a
325/// successfully split section trio from a v1/v2/v3 envelope, or an
326/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
327/// (catalog-only fallback) preserves v3.x readability. v1/v2
328/// envelopes set `publications` to `None`; v3 sets it to the
329/// publications byte slice.
330enum EnvelopeParse<'a> {
331    Bare,
332    Pair {
333        catalog: &'a [u8],
334        users: &'a [u8],
335        publications: Option<&'a [u8]>,
336        subscriptions: Option<&'a [u8]>,
337        statistics: Option<&'a [u8]>,
338    },
339    CrcMismatch {
340        expected: u32,
341        computed: u32,
342    },
343}
344
345/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
346/// `Bare` for a buffer that doesn't look like an envelope (v3.x
347/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
348/// whose trailing CRC32 doesn't match the body.
349fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
350    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
351        return EnvelopeParse::Bare;
352    }
353    let version = buf[8];
354    if !matches!(
355        version,
356        ENVELOPE_VERSION_V1
357            | ENVELOPE_VERSION_V2
358            | ENVELOPE_VERSION_V3
359            | ENVELOPE_VERSION_V4
360            | ENVELOPE_VERSION_V5
361    ) {
362        return EnvelopeParse::Bare;
363    }
364    let mut p = 9usize;
365    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
366        return EnvelopeParse::Bare;
367    };
368    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
369        return EnvelopeParse::Bare;
370    };
371    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
372    p += 4;
373    if p + cat_len + 4 > buf.len() {
374        return EnvelopeParse::Bare;
375    }
376    let catalog = &buf[p..p + cat_len];
377    p += cat_len;
378    let Some(user_len_bytes) = buf.get(p..p + 4) else {
379        return EnvelopeParse::Bare;
380    };
381    let Ok(user_len_arr) = user_len_bytes.try_into() else {
382        return EnvelopeParse::Bare;
383    };
384    let user_len = u32::from_le_bytes(user_len_arr) as usize;
385    p += 4;
386    if p + user_len > buf.len() {
387        return EnvelopeParse::Bare;
388    }
389    let users = &buf[p..p + user_len];
390    p += user_len;
391    let publications = if matches!(
392        version,
393        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
394    ) {
395        // [u32 pubs_len][publications bytes]
396        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
397            return EnvelopeParse::Bare;
398        };
399        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
400            return EnvelopeParse::Bare;
401        };
402        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
403        p += 4;
404        if p + pubs_len > buf.len() {
405            return EnvelopeParse::Bare;
406        }
407        let pubs_slice = &buf[p..p + pubs_len];
408        p += pubs_len;
409        Some(pubs_slice)
410    } else {
411        None
412    };
413    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
414        // [u32 subs_len][subscriptions bytes]
415        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
416            return EnvelopeParse::Bare;
417        };
418        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
419            return EnvelopeParse::Bare;
420        };
421        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
422        p += 4;
423        if p + subs_len > buf.len() {
424            return EnvelopeParse::Bare;
425        }
426        let subs_slice = &buf[p..p + subs_len];
427        p += subs_len;
428        Some(subs_slice)
429    } else {
430        None
431    };
432    let statistics = if version == ENVELOPE_VERSION_V5 {
433        // [u32 stats_len][statistics bytes]
434        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
435            return EnvelopeParse::Bare;
436        };
437        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
438            return EnvelopeParse::Bare;
439        };
440        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
441        p += 4;
442        if p + stats_len > buf.len() {
443            return EnvelopeParse::Bare;
444        }
445        let stats_slice = &buf[p..p + stats_len];
446        p += stats_len;
447        Some(stats_slice)
448    } else {
449        None
450    };
451    if matches!(
452        version,
453        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
454    ) {
455        if p + 4 != buf.len() {
456            return EnvelopeParse::Bare;
457        }
458        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
459            return EnvelopeParse::Bare;
460        };
461        let expected = u32::from_le_bytes(crc_arr);
462        let computed = spg_crypto::crc32::crc32(&buf[..p]);
463        if expected != computed {
464            return EnvelopeParse::CrcMismatch { expected, computed };
465        }
466    } else if p != buf.len() {
467        // v1: must end exactly at the users section.
468        return EnvelopeParse::Bare;
469    }
470    EnvelopeParse::Pair {
471        catalog,
472        users,
473        publications,
474        subscriptions,
475        statistics,
476    }
477}
478
479/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
480/// threaded through `Engine::execute_in` so dispatch can identify which
481/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
482/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
483/// startup replay — implicitly uses through the unchanged
484/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
485/// runtime (dispatch holds `engine.write()` across the wrap, same as
486/// v4.34); the map shape is here to let v4.42 turn on N in-flight
487/// implicit TXs without reshuffling the engine internals.
488#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
489pub struct TxId(pub u64);
490
491/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
492/// global-shadow path. New `alloc_tx_id` handles start at 1.
493pub const IMPLICIT_TX: TxId = TxId(0);
494
495/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
496/// SEGMENTS` when no explicit target is supplied. Segments whose
497/// `OwnedSegment::bytes().len()` is **strictly** less than this
498/// value are eligible to merge. spg-server reads
499/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
500pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
501
502/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
503/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
504/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
505/// rolls back (slot removed, catalog discarded).
506#[derive(Debug, Default, Clone)]
507struct TxState {
508    /// The TX's shadow copy of the catalog. Started as a clone of
509    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
510    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
511    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
512    catalog: Catalog,
513    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
514    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
515    /// `ROLLBACK TO <name>` restores from the entry and pops everything
516    /// after it; `RELEASE <name>` discards the entry and everything
517    /// after; COMMIT/ROLLBACK clears the whole stack.
518    savepoints: Vec<(String, Catalog)>,
519}
520
521/// v7.11.0 — frozen read-only view of the engine's committed state.
522/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
523/// catalog, statistics, clock function, and row-cap config — the
524/// four fields the `execute_readonly` path actually reads. Cheap to
525/// `Clone` (each clone shares the underlying `PersistentVec` row
526/// storage; only the trie root pointers copy). Send + Sync so a
527/// snapshot can be moved across `tokio::task::spawn_blocking`
528/// boundaries without coordination.
529///
530/// The contract: a snapshot reflects the engine's state at the
531/// moment `clone_snapshot()` returned. Subsequent writes to the
532/// engine are NOT visible. Callers who need fresher data take a
533/// new snapshot.
534#[derive(Debug, Clone)]
535pub struct CatalogSnapshot {
536    catalog: Catalog,
537    statistics: statistics::Statistics,
538    clock: Option<ClockFn>,
539    max_query_rows: Option<usize>,
540}
541
542#[derive(Debug, Default)]
543pub struct Engine {
544    /// Committed catalog — what survives `Engine::snapshot()` and what
545    /// outside-TX `SELECT`s read.
546    catalog: Catalog,
547    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
548    /// v4.41.1 runtime invariant: at most one entry (single-writer
549    /// model unchanged). v4.42 will let dispatch hold multiple entries
550    /// concurrently for group commit + engine MVCC.
551    tx_catalogs: BTreeMap<TxId, TxState>,
552    /// Which slot the next exec_* call should mutate. Set by
553    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
554    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
555    /// write goes straight against `catalog`).
556    current_tx: Option<TxId>,
557    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
558    /// reserved for `IMPLICIT_TX`.
559    next_tx_id: u64,
560    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
561    /// / `CURRENT_DATE`. Set by the host environment.
562    clock: Option<ClockFn>,
563    /// v4.1 cryptographic RNG for per-user password salt. Set by the
564    /// host. `None` means SQL-driven `CREATE USER` uses a
565    /// deterministic fallback — see `SaltFn`.
566    salt_fn: Option<SaltFn>,
567    /// v4.2 per-query row cap. `None` = unlimited. When set, a
568    /// SELECT that materialises more than `n` rows returns
569    /// `EngineError::RowLimitExceeded`. Enforced before the result
570    /// is shaped into wire frames so a runaway scan can't blow the
571    /// server's heap.
572    max_query_rows: Option<usize>,
573    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
574    /// the server decides what that means at the auth boundary
575    /// (open mode vs legacy single-password mode). User CRUD goes
576    /// through `create_user`/`drop_user`/`verify_user`; persistence
577    /// rides the snapshot envelope alongside the catalog.
578    users: UserStore,
579    /// v6.1.2 logical-replication publication catalog. Empty until
580    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
581    /// trailer (see `build_envelope`).
582    publications: publications::Publications,
583    /// v6.1.4 logical-replication subscription catalog. Empty until
584    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
585    /// trailer.
586    subscriptions: subscriptions::Subscriptions,
587    /// v6.2.0 — per-column statistics for the cost-based optimizer.
588    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
589    /// table. Persistence rides the v5 envelope trailer.
590    statistics: statistics::Statistics,
591    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
592    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
593    /// the snapshot envelope (rebuilt on demand after restart).
594    plan_cache: plan_cache::PlanCache,
595    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
596    /// surfaced via `spg_stat_query` virtual table. Updated by the
597    /// `execute_*` paths after a successful execute.
598    query_stats: query_stats::QueryStats,
599    /// v6.5.2 — connection-state provider callback. spg-server
600    /// registers a function at startup that snapshots its
601    /// per-pgwire-connection registry into `ActivityRow`s; engine
602    /// reads through it on every `SELECT * FROM spg_stat_activity`.
603    /// `None` ⇒ no-data (returns empty rows; matches the no_std
604    /// embedded callers that don't run pgwire).
605    activity_provider: Option<ActivityProvider>,
606    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
607    /// activity_provider: spg-server registers both at startup;
608    /// engine reads through on `SELECT * FROM spg_audit_chain` and
609    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
610    audit_chain_provider: Option<AuditChainProvider>,
611    audit_verifier: Option<AuditVerifier>,
612    /// v6.5.6 — slow-query log threshold in microseconds. When set,
613    /// every successful execute whose elapsed exceeds the threshold
614    /// gets fed to the registered slow-query log callback (so
615    /// spg-server can emit a structured log line). Default `None`
616    /// = no slow-query logging.
617    slow_query_threshold_us: Option<u64>,
618    slow_query_logger: Option<SlowQueryLogger>,
619    /// v7.12.1 — session parameters set via `SET <name> = <value>`.
620    /// Only `default_text_search_config` is consumed by the engine
621    /// today (the FTS function dispatcher reads it when
622    /// `to_tsvector(text)` is called without an explicit config).
623    /// All other names are accepted + recorded so PG-dump output
624    /// loads, but have no behavioural effect.
625    session_params: BTreeMap<String, String>,
626    /// v7.12.7 — depth counter for trigger-emitted embedded SQL.
627    /// Each time the engine executes a `DeferredEmbeddedStmt` it
628    /// increments this; the recursive `execute_stmt_with_cancel`
629    /// inside that path checks against [`MAX_TRIGGER_RECURSION`]
630    /// to bound runaway cascades (trigger A's UPDATE on table B
631    /// fires trigger B which UPDATEs table A which fires trigger
632    /// A again…). Reset to 0 once the original DML returns.
633    trigger_recursion_depth: u32,
634    /// v7.14.0 — when `SET FOREIGN_KEY_CHECKS=0` is in effect
635    /// (mysqldump preamble), the FK existence + arity check at
636    /// CREATE TABLE time is deferred. FKs referencing a
637    /// not-yet-existing parent land in `pending_foreign_keys`
638    /// keyed by child table; `SET FOREIGN_KEY_CHECKS=1` drains
639    /// the queue and resolves each FK against the now-complete
640    /// catalog. Empty by default; the queue is drained on every
641    /// `RESET ALL` too.
642    foreign_key_checks: bool,
643    pending_foreign_keys: Vec<(alloc::string::String, spg_sql::ast::ForeignKeyConstraint)>,
644}
645
646/// v7.12.7 — hard cap on nested trigger-emitted embedded SQL
647/// fires. 16 deep is well past anything a normal trigger graph
648/// uses while still preventing infinite-loop wedging.
649const MAX_TRIGGER_RECURSION: u32 = 16;
650
651/// v6.5.6 — callback signature for slow-query log emission. Called
652/// with `(sql, elapsed_us)` once per successful execute that crosses
653/// the threshold.
654pub type SlowQueryLogger = fn(&str, u64);
655
656/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
657/// state. Round-trips through `Engine::execute` to recreate the
658/// same schema (sans data + indexes — indexes are emitted as a
659/// separate `CREATE INDEX` chain in `spg_database_ddl`).
660fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
661    let mut out = alloc::format!("CREATE TABLE {name} (");
662    for (i, col) in columns.iter().enumerate() {
663        if i > 0 {
664            out.push_str(", ");
665        }
666        out.push_str(&col.name);
667        out.push(' ');
668        out.push_str(&render_data_type(col.ty));
669        if !col.nullable {
670            out.push_str(" NOT NULL");
671        }
672        if col.auto_increment {
673            out.push_str(" AUTO_INCREMENT");
674        }
675    }
676    out.push(')');
677    out
678}
679
680fn render_data_type(ty: DataType) -> String {
681    match ty {
682        DataType::SmallInt => "SMALLINT".into(),
683        DataType::Int => "INT".into(),
684        DataType::BigInt => "BIGINT".into(),
685        DataType::Float => "FLOAT".into(),
686        DataType::Text => "TEXT".into(),
687        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
688        DataType::Char(n) => alloc::format!("CHAR({n})"),
689        DataType::Bool => "BOOL".into(),
690        DataType::Vector { dim, encoding } => match encoding {
691            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
692            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
693            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
694        },
695        DataType::Numeric { precision, scale } => {
696            alloc::format!("NUMERIC({precision},{scale})")
697        }
698        DataType::Date => "DATE".into(),
699        DataType::Timestamp => "TIMESTAMP".into(),
700        DataType::Interval => "INTERVAL".into(),
701        DataType::Json => "JSON".into(),
702        DataType::Jsonb => "JSONB".into(),
703        DataType::Timestamptz => "TIMESTAMPTZ".into(),
704        DataType::Bytes => "BYTEA".into(),
705        DataType::TextArray => "TEXT[]".into(),
706        DataType::IntArray => "INT[]".into(),
707        DataType::BigIntArray => "BIGINT[]".into(),
708        DataType::TsVector => "TSVECTOR".into(),
709        DataType::TsQuery => "TSQUERY".into(),
710    }
711}
712
713/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
714/// spg-server can construct rows without re-exporting internal
715/// dispatch types.
716#[derive(Debug, Clone)]
717pub struct ActivityRow {
718    pub pid: u32,
719    pub user: String,
720    pub started_at_us: i64,
721    pub current_sql: String,
722    pub wait_event: String,
723    pub elapsed_us: i64,
724    pub in_transaction: bool,
725}
726
727/// v6.5.2 — provider callback type. Fresh snapshot returned each
728/// call; engine doesn't cache the slice.
729pub type ActivityProvider = fn() -> Vec<ActivityRow>;
730
731/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
732/// spg-server can construct rows directly from `AuditEntry`.
733#[derive(Debug, Clone)]
734pub struct AuditRow {
735    pub seq: i64,
736    pub ts_ms: i64,
737    pub prev_hash_hex: String,
738    pub entry_hash_hex: String,
739    pub sql: String,
740}
741
742/// v6.5.3 — chain-table provider + verifier. spg-server registers
743/// fn pointers that snapshot / verify the audit log. `verify`
744/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
745/// `-1` on a clean chain.
746pub type AuditChainProvider = fn() -> Vec<AuditRow>;
747pub type AuditVerifier = fn() -> (i64, i64);
748
749impl Engine {
750    pub fn new() -> Self {
751        Self {
752            catalog: Catalog::new(),
753            tx_catalogs: BTreeMap::new(),
754            current_tx: None,
755            next_tx_id: 1,
756            clock: None,
757            salt_fn: None,
758            max_query_rows: None,
759            users: UserStore::new(),
760            publications: publications::Publications::new(),
761            subscriptions: subscriptions::Subscriptions::new(),
762            statistics: statistics::Statistics::new(),
763            plan_cache: plan_cache::PlanCache::new(),
764            query_stats: query_stats::QueryStats::new(),
765            activity_provider: None,
766            audit_chain_provider: None,
767            audit_verifier: None,
768            slow_query_threshold_us: None,
769            slow_query_logger: None,
770            session_params: BTreeMap::new(),
771            trigger_recursion_depth: 0,
772            foreign_key_checks: true,
773            pending_foreign_keys: Vec::new(),
774        }
775    }
776
777    /// v7.11.0 — clone the engine's committed catalog + read-time
778    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
779    /// backed by `PersistentVec`; cloning is O(log n) per table).
780    /// Subsequent writes to this engine are invisible to the
781    /// snapshot; the snapshot is self-contained and can be moved
782    /// to another thread for concurrent `execute_readonly_on_snapshot`
783    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
784    /// and any other read-fanout pattern.
785    #[must_use]
786    pub fn clone_snapshot(&self) -> CatalogSnapshot {
787        CatalogSnapshot {
788            catalog: self.active_catalog().clone(),
789            statistics: self.statistics.clone(),
790            clock: self.clock,
791            max_query_rows: self.max_query_rows,
792        }
793    }
794
795    /// v7.11.1 — execute a read-only SQL statement against a
796    /// `CatalogSnapshot` without touching this engine. Same
797    /// semantics as `execute_readonly` but parameterised on the
798    /// snapshot's catalog. Reject DDL/DML the same way
799    /// `execute_readonly` does. Static-on-Self so the caller can
800    /// dispatch without holding an `Engine` borrow alongside the
801    /// snapshot.
802    pub fn execute_readonly_on_snapshot(
803        snapshot: &CatalogSnapshot,
804        sql: &str,
805    ) -> Result<QueryResult, EngineError> {
806        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
807    }
808
809    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
810    /// cancellation. Builds a transient `Engine` over the snapshot
811    /// state, runs `execute_readonly_with_cancel`, drops. The
812    /// transient engine is cheap to construct (no I/O; everything
813    /// is just struct moves) and lets the existing read path stay
814    /// untouched.
815    pub fn execute_readonly_on_snapshot_with_cancel(
816        snapshot: &CatalogSnapshot,
817        sql: &str,
818        cancel: CancelToken<'_>,
819    ) -> Result<QueryResult, EngineError> {
820        let transient = Engine {
821            catalog: snapshot.catalog.clone(),
822            statistics: snapshot.statistics.clone(),
823            clock: snapshot.clock,
824            max_query_rows: snapshot.max_query_rows,
825            ..Engine::default()
826        };
827        transient.execute_readonly_with_cancel(sql, cancel)
828    }
829
830    /// Construct an engine restored from a previously-snapshotted catalog
831    /// (see `snapshot()`).
832    pub fn restore(catalog: Catalog) -> Self {
833        Self {
834            catalog,
835            tx_catalogs: BTreeMap::new(),
836            current_tx: None,
837            next_tx_id: 1,
838            clock: None,
839            salt_fn: None,
840            max_query_rows: None,
841            users: UserStore::new(),
842            publications: publications::Publications::new(),
843            subscriptions: subscriptions::Subscriptions::new(),
844            statistics: statistics::Statistics::new(),
845            plan_cache: plan_cache::PlanCache::new(),
846            query_stats: query_stats::QueryStats::new(),
847            activity_provider: None,
848            audit_chain_provider: None,
849            audit_verifier: None,
850            slow_query_threshold_us: None,
851            slow_query_logger: None,
852            session_params: BTreeMap::new(),
853            trigger_recursion_depth: 0,
854            foreign_key_checks: true,
855            pending_foreign_keys: Vec::new(),
856        }
857    }
858
859    /// Restore an engine + user table from a v4.1 envelope produced
860    /// by `snapshot_with_users()`. Falls back to plain catalog-only
861    /// restore if the envelope magic isn't present (so v3.x snapshot
862    /// files still load). v6.1.2 adds the optional publications
863    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
864    /// empty publication table.
865    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
866        match split_envelope(buf) {
867            EnvelopeParse::Pair {
868                catalog: catalog_bytes,
869                users: user_bytes,
870                publications: pub_bytes,
871                subscriptions: sub_bytes,
872                statistics: stats_bytes,
873            } => {
874                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
875                let users = users::deserialize_users(user_bytes)
876                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
877                let publications = match pub_bytes {
878                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
879                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
880                    })?,
881                    None => publications::Publications::new(),
882                };
883                let subscriptions = match sub_bytes {
884                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
885                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
886                    })?,
887                    None => subscriptions::Subscriptions::new(),
888                };
889                let statistics = match stats_bytes {
890                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
891                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
892                    })?,
893                    None => statistics::Statistics::new(),
894                };
895                Ok(Self {
896                    catalog,
897                    tx_catalogs: BTreeMap::new(),
898                    current_tx: None,
899                    next_tx_id: 1,
900                    clock: None,
901                    salt_fn: None,
902                    max_query_rows: None,
903                    users,
904                    publications,
905                    subscriptions,
906                    statistics,
907                    plan_cache: plan_cache::PlanCache::new(),
908                    query_stats: query_stats::QueryStats::new(),
909                    activity_provider: None,
910                    audit_chain_provider: None,
911                    audit_verifier: None,
912                    slow_query_threshold_us: None,
913                    slow_query_logger: None,
914                    session_params: BTreeMap::new(),
915                    trigger_recursion_depth: 0,
916            foreign_key_checks: true,
917            pending_foreign_keys: Vec::new(),
918                })
919            }
920            EnvelopeParse::CrcMismatch { expected, computed } => {
921                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
922                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
923                ))))
924            }
925            EnvelopeParse::Bare => {
926                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
927                Ok(Self::restore(catalog))
928            }
929        }
930    }
931
932    pub const fn users(&self) -> &UserStore {
933        &self.users
934    }
935
936    /// `salt` is supplied by the caller (the host has a random
937    /// source; the engine is `no_std`). Caller should pass a fresh
938    /// 16-byte random value per user.
939    pub fn create_user(
940        &mut self,
941        name: &str,
942        password: &str,
943        role: Role,
944        salt: [u8; 16],
945    ) -> Result<(), UserError> {
946        self.users.create(name, password, role, salt)?;
947        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
948        // auth can verify without re-running PBKDF2 per attempt.
949        // Uses a fresh salt from the host RNG (falls back to a
950        // deterministic per-username salt when no RNG is wired, same
951        // as the legacy hash path).
952        let scram_salt = self.salt_fn.map_or_else(
953            || {
954                let mut s = [0u8; users::SCRAM_SALT_LEN];
955                let digest = spg_crypto::hash(name.as_bytes());
956                // Use bytes 16..32 of BLAKE3 so we don't reuse the
957                // exact same fallback salt as the BLAKE3 hash path.
958                s.copy_from_slice(&digest[16..32]);
959                s
960            },
961            |f| f(),
962        );
963        self.users
964            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
965        Ok(())
966    }
967
968    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
969        self.users.drop(name)
970    }
971
972    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
973        self.users.verify(name, password)
974    }
975
976    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
977    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
978    #[must_use]
979    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
980        self.clock = Some(clock);
981        self
982    }
983
984    /// Builder: attach an OS-backed RNG for per-user password salts.
985    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
986    #[must_use]
987    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
988        self.salt_fn = Some(f);
989        self
990    }
991
992    /// Builder: cap the number of rows a single SELECT may return.
993    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
994    /// the bound is checked inside the executor so a runaway
995    /// catalog scan can't allocate millions of rows before the
996    /// server gets a chance to reject the result.
997    #[must_use]
998    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
999        self.max_query_rows = Some(n);
1000        self
1001    }
1002
1003    /// The *committed* catalog. Note: during a transaction this returns the
1004    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
1005    /// the shadow. Tests that inspect outside-TX state should use this.
1006    pub const fn catalog(&self) -> &Catalog {
1007        &self.catalog
1008    }
1009
1010    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
1011    /// adds the rule that an open TX's shadow is never snapshotted — only the
1012    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
1013    /// when there are users to persist; an empty user table snapshots as the
1014    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
1015    /// adds publications to the envelope condition: either non-empty
1016    /// users OR non-empty publications now triggers the envelope path.
1017    pub fn snapshot(&self) -> Vec<u8> {
1018        if self.users.is_empty()
1019            && self.publications.is_empty()
1020            && self.subscriptions.is_empty()
1021            && self.statistics.is_empty()
1022        {
1023            self.catalog.serialize()
1024        } else {
1025            build_envelope(
1026                &self.catalog.serialize(),
1027                &users::serialize_users(&self.users),
1028                &self.publications.serialize(),
1029                &self.subscriptions.serialize(),
1030                &self.statistics.serialize(),
1031            )
1032        }
1033    }
1034
1035    /// True when at least one TX slot is in flight. v4.41.1 runtime
1036    /// invariant: at most one slot active at a time (dispatch holds
1037    /// `engine.write()` across the entire wrap). v4.42 will let this
1038    /// return true with multiple slots concurrently.
1039    pub fn in_transaction(&self) -> bool {
1040        !self.tx_catalogs.is_empty()
1041    }
1042
1043    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
1044    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
1045    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
1046    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
1047    /// sequentially under a single `engine.write()` so each task's
1048    /// mutations accumulate into shared state, then either keeps the
1049    /// accumulated state (fsync OK) or restores the pre-image via
1050    /// `replace_catalog` (fsync err).
1051    pub fn alloc_tx_id(&mut self) -> TxId {
1052        let id = TxId(self.next_tx_id);
1053        self.next_tx_id = self.next_tx_id.saturating_add(1);
1054        id
1055    }
1056
1057    /// v4.42 — atomically replace the live catalog. Used by the
1058    /// commit-barrier leader to roll back a group whose batched
1059    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1060    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1061    /// at group start, sequentially applies each task's BEGIN+sql+
1062    /// COMMIT under the same write lock to accumulate mutations
1063    /// into shared state, batches the WAL bytes, fsyncs once, and
1064    /// on failure calls this with the pre-image to undo every
1065    /// task in the group at once.
1066    ///
1067    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1068    /// explicit-TX slot from a concurrent client (created via the
1069    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1070    /// MVCC-readers v5+ work) has its own snapshot baked into the
1071    /// slot — restoring `self.catalog` to the pre-image leaves
1072    /// those slots untouched, exactly as they were when the leader
1073    /// took the lock. The leader's own implicit-TX slots are all
1074    /// already discarded (`exec_commit` removed them as each
1075    /// task's COMMIT ran) by the time this is reached.
1076    pub fn replace_catalog(&mut self, catalog: Catalog) {
1077        self.catalog = catalog;
1078    }
1079
1080    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1081    /// so tests + the spg-server freezer can drive a freeze without
1082    /// reaching into the private `active_catalog_mut`. v6.7.4
1083    /// parallel freezer will build on this surface.
1084    ///
1085    /// Marks the table's cached `cold_row_count` stale because the
1086    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1087    pub fn freeze_oldest_to_cold(
1088        &mut self,
1089        table_name: &str,
1090        index_name: &str,
1091        max_rows: usize,
1092    ) -> Result<spg_storage::FreezeReport, EngineError> {
1093        let report = self
1094            .active_catalog_mut()
1095            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1096            .map_err(EngineError::Storage)?;
1097        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1098            t.mark_cold_row_count_stale();
1099        }
1100        Ok(report)
1101    }
1102
1103    /// v6.7.5 — public shim used by the spg-server follower's
1104    /// segment-forwarding receiver. Registers a cold-tier segment
1105    /// at a specific id (the master's id, as transmitted on the
1106    /// wire) so the follower's BTree-Cold locators stay byte-
1107    /// identical with the master's. Wraps
1108    /// `Catalog::load_segment_bytes_at` under the standard
1109    /// clone-mutate-replace pattern.
1110    ///
1111    /// Returns `Ok(())` on success **and** on the "slot already
1112    /// occupied" case — a follower mid-reconnect may receive a
1113    /// segment chunk for a segment_id it already has on disk
1114    /// (forwarded last session); the caller should treat that
1115    /// path as a no-op rather than a fatal error.
1116    pub fn receive_cold_segment(
1117        &mut self,
1118        segment_id: u32,
1119        bytes: Vec<u8>,
1120    ) -> Result<(), EngineError> {
1121        let mut new_cat = self.catalog.clone();
1122        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1123            Ok(()) => {
1124                self.replace_catalog(new_cat);
1125                Ok(())
1126            }
1127            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1128            Err(e) => Err(EngineError::Storage(e)),
1129        }
1130    }
1131
1132    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1133    /// driving every BTree index on every user table. Returns one
1134    /// `(table, index, report)` triple for each merge that
1135    /// actually happened (no-op (table, index) pairs are filtered
1136    /// out so callers can size persist-side work to the live
1137    /// merges). Caller is responsible for persisting each
1138    /// `report.merged_segment_bytes` and updating the on-disk
1139    /// segment registry; engine layer is no_std and never
1140    /// touches disk.
1141    ///
1142    /// Marks every touched table's cached `cold_row_count` stale
1143    /// — compaction GC'd some shadowed rows, so the count must be
1144    /// re-derived on the next ANALYZE.
1145    pub fn compact_cold_segments_with_target(
1146        &mut self,
1147        target_segment_bytes: u64,
1148    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1149        let table_names = self.active_catalog().table_names();
1150        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1151        for tname in table_names {
1152            if is_internal_table_name(&tname) {
1153                continue;
1154            }
1155            let idx_names: Vec<String> = {
1156                let Some(t) = self.active_catalog().get(&tname) else {
1157                    continue;
1158                };
1159                t.indices()
1160                    .iter()
1161                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1162                    .map(|i| i.name.clone())
1163                    .collect()
1164            };
1165            for iname in idx_names {
1166                let report = self
1167                    .active_catalog_mut()
1168                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1169                    .map_err(EngineError::Storage)?;
1170                if report.merged_segment_id.is_some() {
1171                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1172                        t.mark_cold_row_count_stale();
1173                    }
1174                    reports.push((tname.clone(), iname, report));
1175                }
1176            }
1177        }
1178        Ok(reports)
1179    }
1180
1181    fn active_catalog(&self) -> &Catalog {
1182        match self.current_tx {
1183            Some(t) => self
1184                .tx_catalogs
1185                .get(&t)
1186                .map_or(&self.catalog, |s| &s.catalog),
1187            None => &self.catalog,
1188        }
1189    }
1190
1191    /// v7.12.4 — snapshot every row-level trigger on `table` that
1192    /// fires for `event` (`"INSERT"` / `"UPDATE"` / `"DELETE"`) at
1193    /// the given `timing` (`"BEFORE"` / `"AFTER"`), and clone its
1194    /// referenced function definition. Returned as a vec of owned
1195    /// `FunctionDef` so the row-write loop can fire them without
1196    /// holding a borrow on the catalog (which would conflict with
1197    /// the table.insert / update_row / delete mutable borrows).
1198    fn snapshot_row_triggers(
1199        &self,
1200        table: &str,
1201        event: &str,
1202        timing: &str,
1203    ) -> Vec<spg_storage::FunctionDef> {
1204        let cat = self.active_catalog();
1205        cat.triggers()
1206            .iter()
1207            .filter(|t| {
1208                t.table == table
1209                    && t.timing.eq_ignore_ascii_case(timing)
1210                    && t.for_each.eq_ignore_ascii_case("row")
1211                    && t.events.iter().any(|e| e.eq_ignore_ascii_case(event))
1212            })
1213            .filter_map(|t| cat.functions().get(&t.function).cloned())
1214            .collect()
1215    }
1216
1217    /// v7.13.0 — UPDATE-side snapshot that pairs each trigger's
1218    /// function with its `UPDATE OF cols` filter (mailrs round-5
1219    /// G7). Empty filter Vec means "fire unconditionally", matching
1220    /// the v7.12 behaviour.
1221    fn snapshot_update_row_triggers(
1222        &self,
1223        table: &str,
1224        timing: &str,
1225    ) -> Vec<(spg_storage::FunctionDef, Vec<String>)> {
1226        let cat = self.active_catalog();
1227        cat.triggers()
1228            .iter()
1229            .filter(|t| {
1230                t.table == table
1231                    && t.timing.eq_ignore_ascii_case(timing)
1232                    && t.for_each.eq_ignore_ascii_case("row")
1233                    && t.events.iter().any(|e| e.eq_ignore_ascii_case("UPDATE"))
1234            })
1235            .filter_map(|t| {
1236                cat.functions()
1237                    .get(&t.function)
1238                    .cloned()
1239                    .map(|fd| (fd, t.update_columns.clone()))
1240            })
1241            .collect()
1242    }
1243
1244    /// v7.12.7 — drain the trigger-emitted embedded SQL queue.
1245    /// Called by the INSERT / UPDATE / DELETE executors after
1246    /// their main row-write loop returns. Each statement runs
1247    /// inside the same cancel scope as the firing DML and bumps
1248    /// the recursion counter; nested embedded SQL beyond
1249    /// [`MAX_TRIGGER_RECURSION`] errors with a clear message so
1250    /// a trigger-graph cycle surfaces as a query failure instead
1251    /// of stack-blowing the engine.
1252    fn execute_deferred_trigger_stmts(
1253        &mut self,
1254        deferred: Vec<triggers::DeferredEmbeddedStmt>,
1255        cancel: CancelToken<'_>,
1256    ) -> Result<(), EngineError> {
1257        for d in deferred {
1258            if self.trigger_recursion_depth >= MAX_TRIGGER_RECURSION {
1259                return Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1260                    "trigger embedded SQL recursion depth {} exceeded (trigger function \
1261                     {:?} would push past the {} cap — check for trigger cycles)",
1262                    self.trigger_recursion_depth,
1263                    d.function,
1264                    MAX_TRIGGER_RECURSION,
1265                ))));
1266            }
1267            self.trigger_recursion_depth += 1;
1268            let res = self.execute_stmt_with_cancel(d.stmt, cancel);
1269            self.trigger_recursion_depth -= 1;
1270            res?;
1271        }
1272        Ok(())
1273    }
1274
1275    fn active_catalog_mut(&mut self) -> &mut Catalog {
1276        let tx = self.current_tx;
1277        match tx {
1278            Some(t) => match self.tx_catalogs.get_mut(&t) {
1279                Some(s) => &mut s.catalog,
1280                None => &mut self.catalog,
1281            },
1282            None => &mut self.catalog,
1283        }
1284    }
1285
1286    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1287    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1288    /// every other statement, so the caller can fall through to the
1289    /// `&mut self` `execute` path under a write lock. Engine state is
1290    /// not mutated even on the success path (`rewrite_clock_calls`
1291    /// and `resolve_order_by_position` both mutate the locally-owned
1292    /// AST, not `self`).
1293    ///
1294    /// **v4.0 concurrency**: this is the entry point the server takes
1295    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1296    /// parallel without serialising on a single mutex.
1297    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1298        self.execute_readonly_with_cancel(sql, CancelToken::none())
1299    }
1300
1301    /// v4.5 — read path with cooperative cancellation. Token's
1302    /// `is_cancelled` is checked at the start (so a watchdog that
1303    /// already fired returns Cancelled immediately) and at row-loop
1304    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1305    /// don't bother checking.
1306    pub fn execute_readonly_with_cancel(
1307        &self,
1308        sql: &str,
1309        cancel: CancelToken<'_>,
1310    ) -> Result<QueryResult, EngineError> {
1311        cancel.check()?;
1312        let mut stmt = parser::parse_statement(sql)?;
1313        let now_micros = self.clock.map(|f| f());
1314        rewrite_clock_calls(&mut stmt, now_micros);
1315        if let Statement::Select(s) = &mut stmt {
1316            resolve_order_by_position(s);
1317            // v6.2.3 — cost-based JOIN reorder (read path).
1318            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1319        }
1320        let result = match stmt {
1321            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1322            Statement::ShowTables => Ok(self.exec_show_tables()),
1323            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1324            Statement::ShowUsers => Ok(self.exec_show_users()),
1325            Statement::ShowPublications => Ok(self.exec_show_publications()),
1326            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1327            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1328                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1329            )),
1330            Statement::Explain(e) => self.exec_explain(&e, cancel),
1331            _ => Err(EngineError::WriteRequired),
1332        };
1333        self.enforce_row_limit(result)
1334    }
1335
1336    /// v4.2: cap result-set size. Applied after the executor
1337    /// materialises rows but before they leave the engine — wrapping
1338    /// every Rows-returning exec_* function would scatter the check.
1339    fn enforce_row_limit(
1340        &self,
1341        result: Result<QueryResult, EngineError>,
1342    ) -> Result<QueryResult, EngineError> {
1343        if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1344            && rows.len() > cap
1345        {
1346            return Err(EngineError::RowLimitExceeded(cap));
1347        }
1348        result
1349    }
1350
1351    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1352        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1353    }
1354
1355    /// v4.5 — write path with cooperative cancellation. Same dispatch
1356    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1357    /// a separate entry point for backward-compat with the v4.5
1358    /// public API.
1359    pub fn execute_with_cancel(
1360        &mut self,
1361        sql: &str,
1362        cancel: CancelToken<'_>,
1363    ) -> Result<QueryResult, EngineError> {
1364        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1365    }
1366
1367    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1368    /// slot identified by `tx_id` so spg-server dispatch can scope
1369    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1370    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1371    /// every other caller (engine self-tests, replay, spg-embedded)
1372    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1373    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1374        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1375    }
1376
1377    /// v4.41.1 write path with cooperative cancellation + explicit TX
1378    /// scope. Sets `self.current_tx` for the duration of the call so
1379    /// every `exec_*` helper transparently sees its TX's shadow
1380    /// catalog and savepoint stack; restores on exit so the field is
1381    /// only valid mid-call (no leakage across calls).
1382    pub fn execute_in_with_cancel(
1383        &mut self,
1384        sql: &str,
1385        tx_id: TxId,
1386        cancel: CancelToken<'_>,
1387    ) -> Result<QueryResult, EngineError> {
1388        let saved = self.current_tx;
1389        self.current_tx = Some(tx_id);
1390        let result = self.execute_inner_with_cancel(sql, cancel);
1391        self.current_tx = saved;
1392        result
1393    }
1394
1395    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1396    /// resulting [`Statement`] can be cached and re-executed via
1397    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1398    /// the simple-query path would synthesise internally (clock
1399    /// rewrites + ORDER BY position-ref resolution applied at
1400    /// prepare time, since both are session-independent). The
1401    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1402    /// nodes; they're resolved to concrete values per-call by
1403    /// `execute_prepared`'s substitution walk.
1404    ///
1405    /// Pgwire's `Parse` (P) message lands here.
1406    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1407        let mut stmt = parser::parse_statement(sql)?;
1408        let now_micros = self.clock.map(|f| f());
1409        rewrite_clock_calls(&mut stmt, now_micros);
1410        if let Statement::Select(s) = &mut stmt {
1411            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1412            // SELECT-list item BEFORE position / alias resolution so
1413            // downstream passes see the explicit list.
1414            expand_group_by_all(s);
1415            resolve_order_by_position(s);
1416            // v6.2.3 — cost-based JOIN reorder. No-op for
1417            // single-table FROMs or any non-INNER join shape.
1418            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1419        }
1420        Ok(stmt)
1421    }
1422
1423    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1424    /// the plan cache on hit, runs the full `prepare()` path on miss
1425    /// and inserts the resulting plan before returning. Skipping the
1426    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1427    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1428    ///
1429    /// Returns a cloned `Statement` (not a borrow) because the
1430    /// pgwire layer owns its `PreparedStmt` map per-session and the
1431    /// engine-level cache must stay available for other sessions.
1432    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1433    /// it replaces.
1434    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1435        // v6.3.1 — version-aware lookup. If the cached plan was
1436        // prepared before the most recent ANALYZE, evict and replan.
1437        let current_version = self.statistics.version();
1438        if let Some(plan) = self.plan_cache.get(sql) {
1439            if plan.statistics_version == current_version {
1440                return Ok(plan.stmt.clone());
1441            }
1442            // Stale entry — fall through to evict + re-prepare.
1443        }
1444        self.plan_cache.evict(sql);
1445        let stmt = self.prepare(sql)?;
1446        let source_tables = plan_cache::collect_source_tables(&stmt);
1447        let plan = plan_cache::PreparedPlan {
1448            stmt: stmt.clone(),
1449            statistics_version: current_version,
1450            source_tables,
1451            describe_columns: alloc::vec::Vec::new(),
1452        };
1453        self.plan_cache.insert(String::from(sql), plan);
1454        Ok(stmt)
1455    }
1456
1457    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1458    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1459        &self.plan_cache
1460    }
1461
1462    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1463    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1464        &mut self.plan_cache
1465    }
1466
1467    /// v6.3.3 — Describe a prepared `Statement` without executing.
1468    /// Returns `(parameter_oids, output_columns)`. Empty
1469    /// `output_columns` means the statement has no row-producing
1470    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1471    /// — pgwire layer maps that to a `NoData` reply.
1472    pub fn describe_prepared(&self, stmt: &Statement) -> (Vec<u32>, Vec<ColumnSchema>) {
1473        describe::describe_prepared(stmt, self.active_catalog())
1474    }
1475
1476    /// v6.1.1 — execute a [`Statement`] previously returned by
1477    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1478    /// nodes for the corresponding [`Value`] in `params` (1-based
1479    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1480    /// are decoded into typed `Value`s by the pgwire layer before
1481    /// this call so the resulting AST hits the same execution
1482    /// path as a simple query — no SQL re-parse.
1483    ///
1484    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1485    pub fn execute_prepared(
1486        &mut self,
1487        mut stmt: Statement,
1488        params: &[Value],
1489    ) -> Result<QueryResult, EngineError> {
1490        substitute_placeholders(&mut stmt, params)?;
1491        // v7.16.0 — set `current_tx` for the duration of the
1492        // dispatch so the `exec_*` helpers see the right TX
1493        // slot (matches what `execute_in_with_cancel` does for
1494        // simple-query). Pre-v7.16 the simple-query path
1495        // worked because every public entry point routed
1496        // through `execute_in_with_cancel`; the prepared path
1497        // skipped the wrap and so its INSERTs/UPDATEs landed
1498        // in the no-tx default slot, silently invisible to a
1499        // BEGIN/COMMIT-bracketed flow. Caught by spg-sqlx's
1500        // first transaction-visibility test.
1501        let saved = self.current_tx;
1502        self.current_tx = Some(IMPLICIT_TX);
1503        let result = self.execute_stmt_with_cancel(stmt, CancelToken::none());
1504        self.current_tx = saved;
1505        result
1506    }
1507
1508    fn execute_inner_with_cancel(
1509        &mut self,
1510        sql: &str,
1511        cancel: CancelToken<'_>,
1512    ) -> Result<QueryResult, EngineError> {
1513        cancel.check()?;
1514        let stmt = self.prepare(sql)?;
1515        // v6.5.1 — wrap the executor with a wall-clock window so we
1516        // can record into spg_stat_query. Skip when the engine has
1517        // no clock attached (no_std embedded callers).
1518        let start_us = self.clock.map(|f| f());
1519        let result = self.execute_stmt_with_cancel(stmt, cancel);
1520        if let (Some(t0), Ok(_)) = (start_us, &result) {
1521            let now = self.clock.map_or(t0, |f| f());
1522            let elapsed = now.saturating_sub(t0).max(0) as u64;
1523            self.query_stats.record(sql, elapsed, now as u64);
1524            // v6.5.6 — slow-query log: fire callback when elapsed
1525            // exceeds the configured floor.
1526            if let (Some(threshold), Some(logger)) =
1527                (self.slow_query_threshold_us, self.slow_query_logger)
1528                && elapsed >= threshold
1529            {
1530                logger(sql, elapsed);
1531            }
1532        }
1533        result
1534    }
1535
1536    fn execute_stmt_with_cancel(
1537        &mut self,
1538        stmt: Statement,
1539        cancel: CancelToken<'_>,
1540    ) -> Result<QueryResult, EngineError> {
1541        cancel.check()?;
1542        let result = match stmt {
1543            Statement::CreateTable(s) => self.exec_create_table(s),
1544            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
1545            // CommandOk with affected=0; modified_catalog=false so
1546            // the WAL doesn't grow a useless entry. mailrs F3.
1547            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
1548                affected: 0,
1549                modified_catalog: false,
1550            }),
1551            // v7.9.27 — DO $$ ... $$ is also a no-op (SPG has no
1552            // PL/pgSQL). mailrs H1 + pg_dump compat.
1553            Statement::DoBlock => Ok(QueryResult::CommandOk {
1554                affected: 0,
1555                modified_catalog: false,
1556            }),
1557            // v7.14.0 — empty-statement no-op for pg_dump /
1558            // mysqldump preamble lines that collapse to nothing
1559            // after comment-stripping.
1560            Statement::Empty => Ok(QueryResult::CommandOk {
1561                affected: 0,
1562                modified_catalog: false,
1563            }),
1564            Statement::DropTable { names, if_exists } => self.exec_drop_table(names, if_exists),
1565            Statement::DropIndex { name, if_exists } => self.exec_drop_index(name, if_exists),
1566            Statement::CreateIndex(s) => self.exec_create_index(s),
1567            Statement::Insert(s) => self.exec_insert(s),
1568            Statement::Update(s) => self.exec_update_cancel(&s, cancel),
1569            Statement::Delete(s) => self.exec_delete_cancel(&s, cancel),
1570            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1571            Statement::Begin => self.exec_begin(),
1572            Statement::Commit => self.exec_commit(),
1573            Statement::Rollback => self.exec_rollback(),
1574            Statement::Savepoint(name) => self.exec_savepoint(name),
1575            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
1576            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
1577            Statement::ShowTables => Ok(self.exec_show_tables()),
1578            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1579            Statement::ShowUsers => Ok(self.exec_show_users()),
1580            Statement::ShowPublications => Ok(self.exec_show_publications()),
1581            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1582            Statement::CreateUser(s) => self.exec_create_user(&s),
1583            Statement::DropUser(name) => self.exec_drop_user(&name),
1584            Statement::Explain(e) => self.exec_explain(&e, cancel),
1585            Statement::AlterIndex(s) => self.exec_alter_index(s),
1586            Statement::AlterTable(s) => self.exec_alter_table(s),
1587            Statement::CreatePublication(s) => self.exec_create_publication(s),
1588            Statement::DropPublication(name) => self.exec_drop_publication(&name),
1589            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
1590            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
1591            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
1592            // which lives in spg-server's ServerState. The engine
1593            // surfaces a clear error; the server-layer dispatch
1594            // intercepts the SQL before it reaches the engine on
1595            // a server build, so this arm only fires for
1596            // engine-only callers (spg-embedded, lib tests).
1597            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1598                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1599            )),
1600            // v6.2.0 — ANALYZE recomputes per-column histograms.
1601            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
1602            // v6.7.3 — COMPACT COLD SEGMENTS.
1603            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
1604            // v7.12.1 — SET / RESET session parameter. Engine
1605            // tracks the value in `session_params`; FTS dispatcher
1606            // reads `default_text_search_config`. Everything else
1607            // is a recorded no-op (PG dump compat).
1608            Statement::SetParameter { name, value } => {
1609                self.set_session_param(name, value);
1610                Ok(QueryResult::CommandOk {
1611                    affected: 0,
1612                    modified_catalog: false,
1613                })
1614            }
1615            // v7.14.0 — MySQL multi-assignment SET. Each pair runs
1616            // through `set_session_param` so engine-known params
1617            // (FOREIGN_KEY_CHECKS, session_replication_role, …) take
1618            // effect; unknown pairs (including `@VAR` LHS from the
1619            // mysqldump preamble) are recorded then ignored.
1620            Statement::SetParameterList(pairs) => {
1621                for (name, value) in pairs {
1622                    self.set_session_param(name, value);
1623                }
1624                Ok(QueryResult::CommandOk {
1625                    affected: 0,
1626                    modified_catalog: false,
1627                })
1628            }
1629            // v7.12.4 — CREATE FUNCTION / CREATE TRIGGER / DROP …
1630            // for the PL/pgSQL trigger surface. exec_* methods are
1631            // defined alongside the existing CREATE handlers below.
1632            Statement::CreateFunction(s) => self.exec_create_function(s),
1633            Statement::CreateTrigger(s) => self.exec_create_trigger(s),
1634            Statement::DropTrigger {
1635                name,
1636                table,
1637                if_exists,
1638            } => self.exec_drop_trigger(&name, &table, if_exists),
1639            Statement::DropFunction { name, if_exists } => {
1640                self.exec_drop_function(&name, if_exists)
1641            }
1642            Statement::ResetParameter(target) => {
1643                match target {
1644                    None => self.session_params.clear(),
1645                    Some(name) => {
1646                        self.session_params.remove(&name.to_ascii_lowercase());
1647                    }
1648                }
1649                Ok(QueryResult::CommandOk {
1650                    affected: 0,
1651                    modified_catalog: false,
1652                })
1653            }
1654        };
1655        self.enforce_row_limit(result)
1656    }
1657
1658    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
1659    /// surface as `EngineError::Unsupported` so the existing PG-wire
1660    /// error mapping stays uniform; the message carries the name so
1661    /// operators can grep replication-log noise. Inside-transaction
1662    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
1663    /// stance) — replication-catalog mutation is a connection-level
1664    /// administrative op, not a transactional one.
1665    fn exec_create_publication(
1666        &mut self,
1667        s: CreatePublicationStatement,
1668    ) -> Result<QueryResult, EngineError> {
1669        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
1670        // was over-cautious: it also blocked the auto-commit wrap
1671        // path (which begins an internal TX around every WAL-
1672        // logged statement). PG itself allows CREATE PUBLICATION
1673        // inside a transaction (it rolls back with the TX).
1674        self.publications
1675            .create(s.name, s.scope)
1676            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
1677        Ok(QueryResult::CommandOk {
1678            affected: 1,
1679            modified_catalog: true,
1680        })
1681    }
1682
1683    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
1684    /// no-op when the publication doesn't exist (returns `affected=0`
1685    /// in that case so the wire-level command tag distinguishes
1686    /// "dropped" from "no-op", though both succeed).
1687    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1688        let removed = self.publications.drop(name);
1689        Ok(QueryResult::CommandOk {
1690            affected: usize::from(removed),
1691            modified_catalog: removed,
1692        })
1693    }
1694
1695    /// v6.1.2 — read access to the publication catalog. Used by
1696    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
1697    /// (v6.1.3+), and by e2e tests that need to assert state without
1698    /// going through the wire.
1699    pub const fn publications(&self) -> &publications::Publications {
1700        &self.publications
1701    }
1702
1703    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
1704    /// `enabled = true` and `last_received_pos = 0` for a freshly-
1705    /// created subscription. The actual worker thread is spawned
1706    /// by spg-server once the engine returns success.
1707    fn exec_create_subscription(
1708        &mut self,
1709        s: CreateSubscriptionStatement,
1710    ) -> Result<QueryResult, EngineError> {
1711        // See exec_create_publication — the in_transaction gate
1712        // was over-cautious; the auto-commit wrap path holds an
1713        // internal TX that this check was incorrectly blocking.
1714        let sub = subscriptions::Subscription {
1715            conn_str: s.conn_str,
1716            publications: s.publications,
1717            enabled: true,
1718            last_received_pos: 0,
1719        };
1720        self.subscriptions
1721            .create(s.name, sub)
1722            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
1723        Ok(QueryResult::CommandOk {
1724            affected: 1,
1725            modified_catalog: true,
1726        })
1727    }
1728
1729    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
1730    /// doesn't exist (PG-compatible). The associated worker is
1731    /// torn down by spg-server when it observes the catalog
1732    /// change at the next snapshot or via the engine's
1733    /// subscriptions accessor (the worker polls the catalog on
1734    /// reconnect; v6.1.5's filter-side will tighten this to an
1735    /// explicit signal).
1736    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1737        let removed = self.subscriptions.drop(name);
1738        Ok(QueryResult::CommandOk {
1739            affected: usize::from(removed),
1740            modified_catalog: removed,
1741        })
1742    }
1743
1744    /// v6.1.4 — read access to the subscription catalog. Used by
1745    /// the subscription worker (read its own row to find its
1746    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
1747    /// and by e2e tests asserting state directly.
1748    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
1749        &self.subscriptions
1750    }
1751
1752    /// v6.1.4 — write access to `last_received_pos`. Worker
1753    /// calls this after each apply batch (under the engine's
1754    /// write-lock). Returns `false` when the subscription was
1755    /// dropped between when the worker received the record and
1756    /// when this call landed.
1757    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
1758        self.subscriptions.update_last_received_pos(name, pos)
1759    }
1760
1761    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
1762    /// `(name, conn_str, publications, enabled, last_received_pos)`
1763    /// ordered by subscription name. The `publications` column is
1764    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
1765    /// callers wanting structured access read `Engine::subscriptions`.
1766    fn exec_show_subscriptions(&self) -> QueryResult {
1767        let columns = alloc::vec![
1768            ColumnSchema::new("name", DataType::Text, false),
1769            ColumnSchema::new("conn_str", DataType::Text, false),
1770            ColumnSchema::new("publications", DataType::Text, false),
1771            ColumnSchema::new("enabled", DataType::Bool, false),
1772            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1773        ];
1774        let rows: Vec<Row> = self
1775            .subscriptions
1776            .iter()
1777            .map(|(name, sub)| {
1778                Row::new(alloc::vec![
1779                    Value::Text(name.clone()),
1780                    Value::Text(sub.conn_str.clone()),
1781                    Value::Text(sub.publications.join(", ")),
1782                    Value::Bool(sub.enabled),
1783                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1784                ])
1785            })
1786            .collect();
1787        QueryResult::Rows { columns, rows }
1788    }
1789
1790    /// v6.2.0 — materialise `spg_statistic` rows. One row per
1791    /// `(table, column)` pair tracked in `Statistics`, with
1792    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
1793    /// the same canonical form vector literals use for round-trip.
1794    fn exec_spg_statistic(&self) -> QueryResult {
1795        let columns = alloc::vec![
1796            ColumnSchema::new("table_name", DataType::Text, false),
1797            ColumnSchema::new("column_name", DataType::Text, false),
1798            ColumnSchema::new("null_frac", DataType::Float, false),
1799            ColumnSchema::new("n_distinct", DataType::BigInt, false),
1800            ColumnSchema::new("histogram_bounds", DataType::Text, false),
1801            // v6.7.0 — appended column (v6.2.0 stability contract
1802            // allows APPEND to spg_statistic, not reorder/rename).
1803            // Reports the cached per-table cold-row count; same
1804            // value across every column row of the same table.
1805            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
1806        ];
1807        let rows: Vec<Row> = self
1808            .statistics
1809            .iter()
1810            .map(|((t, c), s)| {
1811                let cold = self
1812                    .catalog
1813                    .get(t)
1814                    .map_or(0, |table| table.cold_row_count());
1815                Row::new(alloc::vec![
1816                    Value::Text(t.clone()),
1817                    Value::Text(c.clone()),
1818                    Value::Float(f64::from(s.null_frac)),
1819                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
1820                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
1821                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
1822                ])
1823            })
1824            .collect();
1825        QueryResult::Rows { columns, rows }
1826    }
1827
1828    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
1829    /// per subscription with `(name, conn_str, publications,
1830    /// last_received_pos, enabled)`. Surface mirrors
1831    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
1832    /// shape so it composes with SELECT clauses (WHERE, projection
1833    /// onto specific columns, etc).
1834    fn exec_spg_stat_replication(&self) -> QueryResult {
1835        let columns = alloc::vec![
1836            ColumnSchema::new("name", DataType::Text, false),
1837            ColumnSchema::new("conn_str", DataType::Text, false),
1838            ColumnSchema::new("publications", DataType::Text, false),
1839            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1840            ColumnSchema::new("enabled", DataType::Bool, false),
1841        ];
1842        let rows: Vec<Row> = self
1843            .subscriptions
1844            .iter()
1845            .map(|(name, sub)| {
1846                Row::new(alloc::vec![
1847                    Value::Text(name.clone()),
1848                    Value::Text(sub.conn_str.clone()),
1849                    Value::Text(sub.publications.join(",")),
1850                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1851                    Value::Bool(sub.enabled),
1852                ])
1853            })
1854            .collect();
1855        QueryResult::Rows { columns, rows }
1856    }
1857
1858    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
1859    /// cold-tier segment with `(segment_id, num_rows, num_pages,
1860    /// total_bytes)`.
1861    ///
1862    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
1863    /// carve-out. Walks every user table's BTree indices to find
1864    /// which table's Cold locators point at each segment. Empty
1865    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
1866    /// before any index registered a locator). The walk is
1867    /// O(tables × indices × keys); cached per call, not across
1868    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
1869    fn exec_spg_stat_segment(&self) -> QueryResult {
1870        let columns = alloc::vec![
1871            ColumnSchema::new("segment_id", DataType::BigInt, false),
1872            ColumnSchema::new("table_name", DataType::Text, false),
1873            ColumnSchema::new("num_rows", DataType::BigInt, false),
1874            ColumnSchema::new("num_pages", DataType::BigInt, false),
1875            ColumnSchema::new("total_bytes", DataType::BigInt, false),
1876        ];
1877        // v6.7.0 — build a segment_id → table_name map by walking
1878        // every user table's BTree indices once. O(tables × indices
1879        // × keys) for the v6.5.0 carve-out resolution; acceptable
1880        // because spg_stat_segment is operator-facing (not on a
1881        // hot-loop path).
1882        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
1883        for tname in self.catalog.table_names() {
1884            if is_internal_table_name(&tname) {
1885                continue;
1886            }
1887            let Some(t) = self.catalog.get(&tname) else {
1888                continue;
1889            };
1890            for idx in t.indices() {
1891                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
1892                    for (_, locs) in map.iter() {
1893                        for loc in locs {
1894                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
1895                                segment_owners
1896                                    .entry(*segment_id)
1897                                    .or_insert_with(|| tname.clone());
1898                            }
1899                        }
1900                    }
1901                }
1902            }
1903        }
1904        let rows: Vec<Row> = self
1905            .catalog
1906            .cold_segment_ids_global()
1907            .iter()
1908            .filter_map(|&id| {
1909                let seg = self.catalog.cold_segment(id)?;
1910                let meta = seg.meta();
1911                let owner = segment_owners.get(&id).cloned().unwrap_or_default();
1912                Some(Row::new(alloc::vec![
1913                    Value::BigInt(i64::from(id)),
1914                    Value::Text(owner),
1915                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
1916                    Value::BigInt(i64::from(meta.num_pages)),
1917                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
1918                ]))
1919            })
1920            .collect();
1921        QueryResult::Rows { columns, rows }
1922    }
1923
1924    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
1925    /// distinct SQL text recorded since the engine booted, capped
1926    /// at `QUERY_STATS_MAX` (1024). Columns:
1927    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
1928    /// mean_us = total_us / exec_count (saturating).
1929    fn exec_spg_stat_query(&self) -> QueryResult {
1930        let columns = alloc::vec![
1931            ColumnSchema::new("sql", DataType::Text, false),
1932            ColumnSchema::new("exec_count", DataType::BigInt, false),
1933            ColumnSchema::new("total_us", DataType::BigInt, false),
1934            ColumnSchema::new("mean_us", DataType::BigInt, false),
1935            ColumnSchema::new("max_us", DataType::BigInt, false),
1936            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
1937        ];
1938        let rows: Vec<Row> = self
1939            .query_stats
1940            .snapshot()
1941            .into_iter()
1942            .map(|(sql, s)| {
1943                let mean = if s.exec_count == 0 {
1944                    0
1945                } else {
1946                    s.total_us / s.exec_count
1947                };
1948                Row::new(alloc::vec![
1949                    Value::Text(sql),
1950                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
1951                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
1952                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
1953                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
1954                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
1955                ])
1956            })
1957            .collect();
1958        QueryResult::Rows { columns, rows }
1959    }
1960
1961    /// v6.5.2 — register a connection-state provider. spg-server
1962    /// calls this at startup with a function that snapshots its
1963    /// per-pgwire-connection registry. Engine reads through the
1964    /// callback on `SELECT * FROM spg_stat_activity`.
1965    #[must_use]
1966    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
1967        self.activity_provider = Some(f);
1968        self
1969    }
1970
1971    /// v6.5.3 — register audit chain provider + verifier.
1972    #[must_use]
1973    pub const fn with_audit_providers(
1974        mut self,
1975        chain: AuditChainProvider,
1976        verify: AuditVerifier,
1977    ) -> Self {
1978        self.audit_chain_provider = Some(chain);
1979        self.audit_verifier = Some(verify);
1980        self
1981    }
1982
1983    /// v6.5.6 — register a slow-query log callback. `threshold_us`
1984    /// is the floor (in microseconds); only executes above the floor
1985    /// fire the callback. spg-server wires this from
1986    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
1987    #[must_use]
1988    pub const fn with_slow_query_log(mut self, threshold_us: u64, logger: SlowQueryLogger) -> Self {
1989        self.slow_query_threshold_us = Some(threshold_us);
1990        self.slow_query_logger = Some(logger);
1991        self
1992    }
1993
1994    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
1995    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
1996    /// the compile-time default of 256.
1997    pub fn set_plan_cache_max(&mut self, n: usize) {
1998        self.plan_cache.set_max_entries(n);
1999    }
2000
2001    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
2002    /// snapshot from the registered `ActivityProvider`. Returns an
2003    /// empty result set when no provider is registered (the no_std
2004    /// embedded path with no pgwire layer).
2005    fn exec_spg_stat_activity(&self) -> QueryResult {
2006        let columns = alloc::vec![
2007            ColumnSchema::new("pid", DataType::Int, false),
2008            ColumnSchema::new("user", DataType::Text, false),
2009            ColumnSchema::new("started_at_us", DataType::BigInt, false),
2010            ColumnSchema::new("current_sql", DataType::Text, false),
2011            ColumnSchema::new("wait_event", DataType::Text, false),
2012            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
2013            ColumnSchema::new("in_transaction", DataType::Bool, false),
2014        ];
2015        let rows: Vec<Row> = self
2016            .activity_provider
2017            .map(|f| f())
2018            .unwrap_or_default()
2019            .into_iter()
2020            .map(|r| {
2021                Row::new(alloc::vec![
2022                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
2023                    Value::Text(r.user),
2024                    Value::BigInt(r.started_at_us),
2025                    Value::Text(r.current_sql),
2026                    Value::Text(r.wait_event),
2027                    Value::BigInt(r.elapsed_us),
2028                    Value::Bool(r.in_transaction),
2029                ])
2030            })
2031            .collect();
2032        QueryResult::Rows { columns, rows }
2033    }
2034
2035    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
2036    /// table with `(table_name, ddl)`. Reconstructed from catalog
2037    /// state on demand.
2038    fn exec_spg_table_ddl(&self) -> QueryResult {
2039        let columns = alloc::vec![
2040            ColumnSchema::new("table_name", DataType::Text, false),
2041            ColumnSchema::new("ddl", DataType::Text, false),
2042        ];
2043        let rows: Vec<Row> = self
2044            .catalog
2045            .table_names()
2046            .into_iter()
2047            .filter(|n| !is_internal_table_name(n))
2048            .filter_map(|name| {
2049                let table = self.catalog.get(&name)?;
2050                let ddl = render_create_table(&name, &table.schema().columns);
2051                Some(Row::new(alloc::vec![Value::Text(name), Value::Text(ddl),]))
2052            })
2053            .collect();
2054        QueryResult::Rows { columns, rows }
2055    }
2056
2057    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
2058    /// with `(role_name, ddl)`. Password is redacted (matches the
2059    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
2060    fn exec_spg_role_ddl(&self) -> QueryResult {
2061        let columns = alloc::vec![
2062            ColumnSchema::new("role_name", DataType::Text, false),
2063            ColumnSchema::new("ddl", DataType::Text, false),
2064        ];
2065        let rows: Vec<Row> = self
2066            .users
2067            .iter()
2068            .map(|(name, rec)| {
2069                let ddl = alloc::format!(
2070                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
2071                    rec.role.as_str(),
2072                );
2073                Row::new(alloc::vec![
2074                    Value::Text(String::from(name)),
2075                    Value::Text(ddl)
2076                ])
2077            })
2078            .collect();
2079        QueryResult::Rows { columns, rows }
2080    }
2081
2082    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
2083    /// `ddl` column concatenates every user table's CREATE +
2084    /// every role's CREATE in deterministic catalog order. Suitable
2085    /// for piping back through `Engine::execute` to recreate a
2086    /// schema-equivalent database.
2087    fn exec_spg_database_ddl(&self) -> QueryResult {
2088        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
2089        let mut out = String::new();
2090        for (name, rec) in self.users.iter() {
2091            out.push_str(&alloc::format!(
2092                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
2093                rec.role.as_str(),
2094            ));
2095        }
2096        for name in self.catalog.table_names() {
2097            if is_internal_table_name(&name) {
2098                continue;
2099            }
2100            if let Some(table) = self.catalog.get(&name) {
2101                out.push_str(&render_create_table(&name, &table.schema().columns));
2102                out.push_str(";\n");
2103            }
2104        }
2105        QueryResult::Rows {
2106            columns,
2107            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
2108        }
2109    }
2110
2111    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
2112    /// snapshot from the registered provider; empty when no
2113    /// provider is set.
2114    fn exec_spg_audit_chain(&self) -> QueryResult {
2115        let columns = alloc::vec![
2116            ColumnSchema::new("seq", DataType::BigInt, false),
2117            ColumnSchema::new("ts_ms", DataType::BigInt, false),
2118            ColumnSchema::new("prev_hash", DataType::Text, false),
2119            ColumnSchema::new("entry_hash", DataType::Text, false),
2120            ColumnSchema::new("sql", DataType::Text, false),
2121        ];
2122        let rows: Vec<Row> = self
2123            .audit_chain_provider
2124            .map(|f| f())
2125            .unwrap_or_default()
2126            .into_iter()
2127            .map(|r| {
2128                Row::new(alloc::vec![
2129                    Value::BigInt(r.seq),
2130                    Value::BigInt(r.ts_ms),
2131                    Value::Text(r.prev_hash_hex),
2132                    Value::Text(r.entry_hash_hex),
2133                    Value::Text(r.sql),
2134                ])
2135            })
2136            .collect();
2137        QueryResult::Rows { columns, rows }
2138    }
2139
2140    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
2141    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
2142    /// on a clean chain. Returns one row with both values 0 when
2143    /// no verifier is registered (no-data fallback for embedded
2144    /// callers).
2145    fn exec_spg_audit_verify(&self) -> QueryResult {
2146        let columns = alloc::vec![
2147            ColumnSchema::new("verified_count", DataType::BigInt, false),
2148            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
2149        ];
2150        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
2151        let row = Row::new(alloc::vec![Value::BigInt(verified), Value::BigInt(broken),]);
2152        QueryResult::Rows {
2153            columns,
2154            rows: alloc::vec![row],
2155        }
2156    }
2157
2158    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
2159    pub fn query_stats(&self) -> &query_stats::QueryStats {
2160        &self.query_stats
2161    }
2162
2163    /// v6.5.1 — mutable accessor (clear, etc).
2164    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
2165        &mut self.query_stats
2166    }
2167
2168    /// v6.2.0 — read access to the per-column statistics table.
2169    /// Used by the planner (v6.2.2 selectivity functions read this),
2170    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
2171    pub const fn statistics(&self) -> &statistics::Statistics {
2172        &self.statistics
2173    }
2174
2175    /// v6.2.1 — return tables whose modified-row count crossed the
2176    /// auto-analyze threshold since the last ANALYZE on that table.
2177    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
2178    /// ANALYZE)` — combines PG-style fractional + absolute lower
2179    /// bound so a fresh / tiny table doesn't get hammered on every
2180    /// INSERT.
2181    ///
2182    /// Designed to be cheap: walks every user table's
2183    /// `Catalog::table_names()` + reads `statistics::modified_
2184    /// since_last_analyze()` (BTreeMap lookup). The background
2185    /// worker calls this under `engine.read()` then drops the lock
2186    /// before re-acquiring `engine.write()` for the actual ANALYZE.
2187    pub fn tables_needing_analyze(&self) -> Vec<String> {
2188        const MIN_ROWS: u64 = 100;
2189        let mut out = Vec::new();
2190        for name in self.catalog.table_names() {
2191            if is_internal_table_name(&name) {
2192                continue;
2193            }
2194            let Some(table) = self.catalog.get(&name) else {
2195                continue;
2196            };
2197            let row_count = table.rows().len() as u64;
2198            let modified = self.statistics.modified_since_last_analyze(&name);
2199            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
2200            // computed in integer arithmetic so spg-engine stays
2201            // no_std without pulling in libm. `(n + 9) / 10` is
2202            // `ceil(n / 10)` for non-negative `n`.
2203            let base = row_count.max(MIN_ROWS);
2204            let threshold = base.saturating_add(9) / 10;
2205            if modified >= threshold {
2206                out.push(name);
2207            }
2208        }
2209        out
2210    }
2211
2212    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2213    /// every user table; `ANALYZE <name>` re-stats one. For each
2214    /// target table, single-pass scan + per-column histogram +
2215    /// `null_frac` + `n_distinct`. Replaces the table's prior
2216    /// stats; resets the modified-row counter.
2217    ///
2218    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2219    /// can add reservoir sampling at the > 100 K-row mark; not a
2220    /// scope blocker for the current commit since rows ≤ 100 K
2221    /// analyse in milliseconds.
2222    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2223        let names: Vec<String> = if let Some(name) = target {
2224            // Verify the table exists; surface a clear error if not.
2225            if self.catalog.get(name).is_none() {
2226                return Err(EngineError::Storage(StorageError::TableNotFound {
2227                    name: name.to_string(),
2228                }));
2229            }
2230            alloc::vec![name.to_string()]
2231        } else {
2232            self.catalog
2233                .table_names()
2234                .into_iter()
2235                .filter(|n| !is_internal_table_name(n))
2236                .collect()
2237        };
2238        let mut analysed = 0usize;
2239        for table_name in &names {
2240            self.analyze_one_table(table_name)?;
2241            analysed += 1;
2242        }
2243        // v6.3.1 — plan cache invalidation. Bump stats version so
2244        // future lookups see the new generation, and selectively
2245        // evict every plan whose `source_tables` overlap with the
2246        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2247        // whole cache.
2248        if analysed > 0 {
2249            self.statistics.bump_version();
2250            if target.is_some() {
2251                for t in &names {
2252                    self.plan_cache.evict_referencing(t);
2253                }
2254            } else {
2255                self.plan_cache.clear();
2256            }
2257        }
2258        Ok(QueryResult::CommandOk {
2259            affected: analysed,
2260            modified_catalog: true,
2261        })
2262    }
2263
2264    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2265    /// engine-layer compaction shim with the default
2266    /// 4 MiB segment-size threshold. spg-server intercepts the
2267    /// SQL before it reaches the engine on a server build —
2268    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2269    /// `Engine::compact_cold_segments_with_target` directly with
2270    /// the env value, and persists every merged segment to
2271    /// v7.12.1 — record a `SET <name> = <value>` parameter. Names
2272    /// are case-folded to lowercase to match PG; values keep their
2273    /// caller-supplied form so observability paths see what was
2274    /// requested. Only `default_text_search_config` is consulted by
2275    /// the engine today.
2276    fn set_session_param(&mut self, name: String, value: spg_sql::ast::SetValue) {
2277        let normalised = match value {
2278            spg_sql::ast::SetValue::String(s) => s,
2279            spg_sql::ast::SetValue::Ident(s) => s,
2280            spg_sql::ast::SetValue::Number(s) => s,
2281            spg_sql::ast::SetValue::Default => String::new(),
2282        };
2283        let key = name.to_ascii_lowercase();
2284        // v7.14.0 — mysqldump preamble emits
2285        // `SET FOREIGN_KEY_CHECKS=0` so it can CREATE TABLE in any
2286        // order despite cross-table FK references; the closing
2287        // section emits `SET FOREIGN_KEY_CHECKS=1` (or
2288        // `=@OLD_FOREIGN_KEY_CHECKS` which resolves to "ON" in our
2289        // session-variable-aware path). Match both shapes.
2290        // Also accept PG's `session_replication_role = 'replica'`
2291        // which suppresses trigger + FK enforcement during a
2292        // logical replication apply (pg_dump preserves this for
2293        // schema-only mode but it shows up in some restores).
2294        let value_off = matches!(
2295            normalised.to_ascii_lowercase().as_str(),
2296            "0" | "off" | "false"
2297        );
2298        let value_on = matches!(
2299            normalised.to_ascii_lowercase().as_str(),
2300            "1" | "on" | "true"
2301        );
2302        if key == "foreign_key_checks"
2303            || key == "session_replication_role" && normalised.eq_ignore_ascii_case("replica")
2304        {
2305            if value_off || key == "session_replication_role" {
2306                self.foreign_key_checks = false;
2307            } else if value_on
2308                || (key == "session_replication_role"
2309                    && normalised.eq_ignore_ascii_case("origin"))
2310            {
2311                self.foreign_key_checks = true;
2312                // Drain pending FK queue against the now-complete
2313                // catalog. Errors here surface as the SET reply —
2314                // caller knows enabling checks revealed orphans.
2315                let _ = self.drain_pending_foreign_keys();
2316            }
2317        }
2318        self.session_params.insert(key, normalised);
2319    }
2320
2321    /// v7.14.0 — resolve every queued FK whose installation was
2322    /// deferred (`SET FOREIGN_KEY_CHECKS=0` window). Called by
2323    /// `set_session_param` when checks flip back on and by the
2324    /// drop-import release gate. Each FK is resolved against the
2325    /// current catalog; remaining missing-parent errors propagate
2326    /// up so the caller knows the import was incomplete.
2327    fn drain_pending_foreign_keys(&mut self) -> Result<(), EngineError> {
2328        let pending = core::mem::take(&mut self.pending_foreign_keys);
2329        for (child, fk) in pending {
2330            // Resolve against the current catalog. Skip silently
2331            // when the child table itself was dropped between
2332            // queue + drain.
2333            let cols_snapshot = match self.active_catalog().get(&child) {
2334                Some(t) => t.schema().columns.clone(),
2335                None => continue,
2336            };
2337            let storage_fk = resolve_foreign_key(&child, &cols_snapshot, fk, self.active_catalog())?;
2338            let table = self
2339                .active_catalog_mut()
2340                .get_mut(&child)
2341                .expect("checked above");
2342            table.schema_mut().foreign_keys.push(storage_fk);
2343        }
2344        Ok(())
2345    }
2346
2347    /// v7.12.1 — read a session parameter set via `SET`. Used by
2348    /// the FTS function dispatcher to resolve the default config
2349    /// for `to_tsvector(text)` / `plainto_tsquery(text)` etc.
2350    #[must_use]
2351    pub fn session_param(&self, name: &str) -> Option<&str> {
2352        self.session_params
2353            .get(&name.to_ascii_lowercase())
2354            .map(String::as_str)
2355    }
2356
2357    /// v7.12.1 — build an `EvalContext` chained with the session's
2358    /// `default_text_search_config`. Engine-internal callers use
2359    /// this instead of `EvalContext::new` so the FTS function
2360    /// dispatcher sees the SET configuration.
2361    fn ev_ctx<'a>(
2362        &'a self,
2363        columns: &'a [ColumnSchema],
2364        alias: Option<&'a str>,
2365    ) -> EvalContext<'a> {
2366        EvalContext::new(columns, alias)
2367            .with_default_text_search_config(self.session_param("default_text_search_config"))
2368    }
2369
2370    /// `<db>.spg/segments/`. This arm only fires for engine-only
2371    /// callers (spg-embedded, lib tests); in that mode merged
2372    /// segments live in memory and are dropped at process exit.
2373    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2374        let target = COMPACTION_TARGET_DEFAULT_BYTES;
2375        let reports = self.compact_cold_segments_with_target(target)?;
2376        let columns = alloc::vec![
2377            ColumnSchema::new("table_name", DataType::Text, false),
2378            ColumnSchema::new("index_name", DataType::Text, false),
2379            ColumnSchema::new("sources_merged", DataType::BigInt, false),
2380            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2381            ColumnSchema::new("merged_rows", DataType::BigInt, false),
2382            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2383            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2384        ];
2385        let rows: Vec<Row> = reports
2386            .into_iter()
2387            .map(|(tname, iname, report)| {
2388                Row::new(alloc::vec![
2389                    Value::Text(tname),
2390                    Value::Text(iname),
2391                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2392                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2393                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2394                    Value::BigInt(i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),),
2395                    Value::BigInt(
2396                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2397                    ),
2398                ])
2399            })
2400            .collect();
2401        Ok(QueryResult::Rows { columns, rows })
2402    }
2403
2404    /// Walk a single table's rows once and (re-)populate per-column
2405    /// stats. Drops the existing stats for `table` first so columns
2406    /// that have been DROP-ed between ANALYZEs don't leave stale
2407    /// rows.
2408    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2409        let table = self.catalog.get(table_name).ok_or_else(|| {
2410            EngineError::Storage(StorageError::TableNotFound {
2411                name: table_name.to_string(),
2412            })
2413        })?;
2414        let schema = table.schema().clone();
2415        let row_count = table.rows().len();
2416        // For each column, collect (sorted) non-NULL textual values
2417        // + count NULLs; then ask `statistics::build_histogram` to
2418        // produce the 101 bounds and `estimate_n_distinct` the
2419        // distinct count.
2420        self.statistics.clear_table(table_name);
2421        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2422            // v6.2.0 skip: vector columns have their own stats
2423            // shape (HNSW graph topology). v6.2 deliberation #1.
2424            if matches!(col_schema.ty, DataType::Vector { .. }) {
2425                continue;
2426            }
2427            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2428            let mut nulls: u64 = 0;
2429            for row in table.rows() {
2430                match row.values.get(col_pos) {
2431                    Some(Value::Null) | None => nulls += 1,
2432                    Some(v) => non_null_values.push(v.clone()),
2433                }
2434            }
2435            // Sort by type-aware ordering (Int as int, Text as
2436            // lex, etc.) so histogram bounds reflect the column's
2437            // natural order — not lexicographic on the string
2438            // representation, which would put "9" after "49".
2439            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2440            let non_null: Vec<String> = non_null_values.iter().map(canonical_value_repr).collect();
2441            let null_frac = if row_count == 0 {
2442                0.0
2443            } else {
2444                #[allow(clippy::cast_precision_loss)]
2445                let f = nulls as f32 / row_count as f32;
2446                f
2447            };
2448            let n_distinct = statistics::estimate_n_distinct(&non_null);
2449            let histogram_bounds = statistics::build_histogram(&non_null);
2450            self.statistics.set(
2451                table_name.to_string(),
2452                col_schema.name.clone(),
2453                statistics::ColumnStats {
2454                    null_frac,
2455                    n_distinct,
2456                    histogram_bounds,
2457                },
2458            );
2459        }
2460        self.statistics.reset_modified(table_name);
2461        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
2462        // BTree indices and count Cold locators (MAX across
2463        // indices); store the result on the table. Surfaced via
2464        // `spg_statistic.cold_row_count` (new column) and
2465        // `spg_stat_segment.table_name` (new column).
2466        let cold_count = {
2467            let table = self
2468                .active_catalog()
2469                .get(table_name)
2470                .expect("table still present");
2471            table.count_cold_locators()
2472        };
2473        let table_mut = self
2474            .active_catalog_mut()
2475            .get_mut(table_name)
2476            .expect("table still present");
2477        table_mut.set_cold_row_count(cold_count);
2478        Ok(())
2479    }
2480
2481    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
2482    /// `(name, scope, table_count)` ordered by publication name.
2483    ///   - `scope` is the human-readable string:
2484    ///       `"FOR ALL TABLES"` /
2485    ///       `"FOR TABLE t1, t2"` /
2486    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
2487    ///   - `table_count` is NULL for `AllTables`, the list length
2488    ///     otherwise. NULLability lets clients distinguish "publish
2489    ///     everything" from "publish exactly 0 tables" (the v6.1.3
2490    ///     parser forbids the empty list, but the column shape is
2491    ///     ready for the v6.1.5 publisher-side semantics).
2492    fn exec_show_publications(&self) -> QueryResult {
2493        let columns = alloc::vec![
2494            ColumnSchema::new("name", DataType::Text, false),
2495            ColumnSchema::new("scope", DataType::Text, false),
2496            ColumnSchema::new("table_count", DataType::Int, true),
2497        ];
2498        let rows: Vec<Row> = self
2499            .publications
2500            .iter()
2501            .map(|(name, scope)| {
2502                let (scope_str, count_val) = match scope {
2503                    spg_sql::ast::PublicationScope::AllTables => {
2504                        ("FOR ALL TABLES".to_string(), Value::Null)
2505                    }
2506                    spg_sql::ast::PublicationScope::ForTables(ts) => (
2507                        alloc::format!("FOR TABLE {}", ts.join(", ")),
2508                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2509                    ),
2510                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
2511                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
2512                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2513                    ),
2514                };
2515                Row::new(alloc::vec![
2516                    Value::Text(name.clone()),
2517                    Value::Text(scope_str),
2518                    count_val,
2519                ])
2520            })
2521            .collect();
2522        QueryResult::Rows { columns, rows }
2523    }
2524
2525    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
2526    fn exec_show_users(&self) -> QueryResult {
2527        let columns = alloc::vec![
2528            ColumnSchema::new("name", DataType::Text, false),
2529            ColumnSchema::new("role", DataType::Text, false),
2530        ];
2531        let rows: Vec<Row> = self
2532            .users
2533            .iter()
2534            .map(|(name, rec)| {
2535                Row::new(alloc::vec![
2536                    Value::Text(name.to_string()),
2537                    Value::Text(rec.role.as_str().to_string()),
2538                ])
2539            })
2540            .collect();
2541        QueryResult::Rows { columns, rows }
2542    }
2543
2544    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
2545        if self.in_transaction() {
2546            return Err(EngineError::Unsupported(
2547                "CREATE USER is not allowed inside a transaction".into(),
2548            ));
2549        }
2550        let role = users::Role::parse(&s.role).ok_or_else(|| {
2551            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
2552        })?;
2553        // Prefer the host-injected RNG. Falls back to a deterministic
2554        // salt derived from the username only when no RNG is wired —
2555        // acceptable for tests; the server always installs one.
2556        let salt = self.salt_fn.map_or_else(
2557            || {
2558                let mut s_bytes = [0u8; 16];
2559                let digest = spg_crypto::hash(s.name.as_bytes());
2560                s_bytes.copy_from_slice(&digest[..16]);
2561                s_bytes
2562            },
2563            |f| f(),
2564        );
2565        self.users
2566            .create(&s.name, &s.password, role, salt)
2567            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
2568        Ok(QueryResult::CommandOk {
2569            affected: 1,
2570            modified_catalog: true,
2571        })
2572    }
2573
2574    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2575        if self.in_transaction() {
2576            return Err(EngineError::Unsupported(
2577                "DROP USER is not allowed inside a transaction".into(),
2578            ));
2579        }
2580        self.users
2581            .drop(name)
2582            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
2583        Ok(QueryResult::CommandOk {
2584            affected: 1,
2585            modified_catalog: true,
2586        })
2587    }
2588
2589    /// v7.12.4 — `CREATE [OR REPLACE] FUNCTION`. Stores the
2590    /// function metadata in the catalog. PL/pgSQL bodies are
2591    /// already parsed by the SQL parser; we re-canonicalise the
2592    /// body to source text for storage (the executor re-parses
2593    /// it at trigger fire time — see the trigger fire path).
2594    fn exec_create_function(
2595        &mut self,
2596        s: spg_sql::ast::CreateFunctionStatement,
2597    ) -> Result<QueryResult, EngineError> {
2598        let args_repr = render_function_args(&s.args);
2599        let returns = match &s.returns {
2600            spg_sql::ast::FunctionReturn::Trigger => alloc::string::String::from("TRIGGER"),
2601            spg_sql::ast::FunctionReturn::Void => alloc::string::String::from("VOID"),
2602            spg_sql::ast::FunctionReturn::Type(t) => alloc::format!("{t}"),
2603            spg_sql::ast::FunctionReturn::Other(s) => s.clone(),
2604        };
2605        let body_text = match &s.body {
2606            spg_sql::ast::FunctionBody::PlPgSql(b) => alloc::format!("{b}"),
2607            spg_sql::ast::FunctionBody::Raw(s) => s.clone(),
2608        };
2609        let def = spg_storage::FunctionDef {
2610            name: s.name.clone(),
2611            args_repr,
2612            returns,
2613            language: s.language.clone(),
2614            body: body_text,
2615        };
2616        self.active_catalog_mut()
2617            .create_function(def, s.or_replace)
2618            .map_err(EngineError::Storage)?;
2619        Ok(QueryResult::CommandOk {
2620            affected: 0,
2621            modified_catalog: true,
2622        })
2623    }
2624
2625    /// v7.12.4 — `CREATE [OR REPLACE] TRIGGER`. The referenced
2626    /// function must already exist in the catalog (forward
2627    /// references defer to a later release). Persists the
2628    /// trigger metadata for the row-write hooks below to consult.
2629    fn exec_create_trigger(
2630        &mut self,
2631        s: spg_sql::ast::CreateTriggerStatement,
2632    ) -> Result<QueryResult, EngineError> {
2633        let timing = match s.timing {
2634            spg_sql::ast::TriggerTiming::Before => "BEFORE",
2635            spg_sql::ast::TriggerTiming::After => "AFTER",
2636            spg_sql::ast::TriggerTiming::InsteadOf => "INSTEAD OF",
2637        };
2638        let events: Vec<alloc::string::String> = s
2639            .events
2640            .iter()
2641            .map(|e| match e {
2642                spg_sql::ast::TriggerEvent::Insert => alloc::string::String::from("INSERT"),
2643                spg_sql::ast::TriggerEvent::Update => alloc::string::String::from("UPDATE"),
2644                spg_sql::ast::TriggerEvent::Delete => alloc::string::String::from("DELETE"),
2645                spg_sql::ast::TriggerEvent::Truncate => alloc::string::String::from("TRUNCATE"),
2646            })
2647            .collect();
2648        let for_each = match s.for_each {
2649            spg_sql::ast::TriggerForEach::Row => "ROW",
2650            spg_sql::ast::TriggerForEach::Statement => "STATEMENT",
2651        };
2652        let def = spg_storage::TriggerDef {
2653            name: s.name.clone(),
2654            table: s.table.clone(),
2655            timing: alloc::string::String::from(timing),
2656            events,
2657            for_each: alloc::string::String::from(for_each),
2658            function: s.function.clone(),
2659            update_columns: s.update_columns.clone(),
2660        };
2661        self.active_catalog_mut()
2662            .create_trigger(def, s.or_replace)
2663            .map_err(EngineError::Storage)?;
2664        Ok(QueryResult::CommandOk {
2665            affected: 0,
2666            modified_catalog: true,
2667        })
2668    }
2669
2670    fn exec_drop_trigger(
2671        &mut self,
2672        name: &str,
2673        table: &str,
2674        if_exists: bool,
2675    ) -> Result<QueryResult, EngineError> {
2676        let removed = self.active_catalog_mut().drop_trigger(name, table);
2677        if !removed && !if_exists {
2678            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2679                alloc::format!("trigger {name:?} on {table:?} does not exist"),
2680            )));
2681        }
2682        Ok(QueryResult::CommandOk {
2683            affected: usize::from(removed),
2684            modified_catalog: removed,
2685        })
2686    }
2687
2688    fn exec_drop_function(
2689        &mut self,
2690        name: &str,
2691        if_exists: bool,
2692    ) -> Result<QueryResult, EngineError> {
2693        let removed = self.active_catalog_mut().drop_function(name);
2694        if !removed && !if_exists {
2695            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2696                alloc::format!("function {name:?} does not exist"),
2697            )));
2698        }
2699        Ok(QueryResult::CommandOk {
2700            affected: usize::from(removed),
2701            modified_catalog: removed,
2702        })
2703    }
2704
2705    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
2706    /// Filter pass uses the same WHERE eval as `exec_select`. Per
2707    /// matched row, evaluate each RHS expression against the *old*
2708    /// row, then call `Table::update_row` which rebuilds indices.
2709    /// Indexed columns are correctly reflected because rebuild
2710    /// happens after the cell rewrite.
2711    fn exec_update_cancel(
2712        &mut self,
2713        stmt: &spg_sql::ast::UpdateStatement,
2714        cancel: CancelToken<'_>,
2715    ) -> Result<QueryResult, EngineError> {
2716        // v7.12.5 — snapshot BEFORE/AFTER UPDATE row triggers + the
2717        // session FTS config before the table mut-borrow opens (the
2718        // INSERT path uses the same pattern). Empty vecs are the
2719        // common "no triggers on this table" fast path.
2720        // v7.13.0 — UPDATE triggers carry an optional `UPDATE OF
2721        // cols` filter. The filter is paired with each function so
2722        // the per-row fire loop can skip when no listed column
2723        // actually differs between OLD and NEW.
2724        let before_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "BEFORE");
2725        let after_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "AFTER");
2726        let trigger_session_cfg: Option<String> = self
2727            .session_params
2728            .get("default_text_search_config")
2729            .cloned();
2730        // v5.2.3: if the WHERE is a PK equality and matches a cold-
2731        // tier row, promote it back to the hot tier *before* the
2732        // hot-row walk. The promote pushes the row to the end of
2733        // `table.rows`, where the upcoming SET-evaluation loop will
2734        // pick it up and apply the assignments. Lookups for the key
2735        // never observe a gap because `promote_cold_row` inserts the
2736        // hot row before retiring the cold locator.
2737        if let Some(w) = &stmt.where_ {
2738            let schema_cols = self
2739                .active_catalog()
2740                .get(&stmt.table)
2741                .ok_or_else(|| {
2742                    EngineError::Storage(StorageError::TableNotFound {
2743                        name: stmt.table.clone(),
2744                    })
2745                })?
2746                .schema()
2747                .columns
2748                .clone();
2749            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2750                && let Some(idx_name) = self
2751                    .active_catalog()
2752                    .get(&stmt.table)
2753                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2754            {
2755                // Promote may be a no-op (key is hot-only or absent);
2756                // we don't care about the return value here — the
2757                // subsequent hot walk will either match or not.
2758                let _ = self
2759                    .active_catalog_mut()
2760                    .promote_cold_row(&stmt.table, &idx_name, &key);
2761            }
2762        }
2763
2764        // v7.12.1 — cache session FTS config before the table
2765        // mut-borrow (same reason as exec_delete).
2766        let ts_cfg: Option<String> = self
2767            .session_param("default_text_search_config")
2768            .map(String::from);
2769        let table = self
2770            .active_catalog_mut()
2771            .get_mut(&stmt.table)
2772            .ok_or_else(|| {
2773                EngineError::Storage(StorageError::TableNotFound {
2774                    name: stmt.table.clone(),
2775                })
2776            })?;
2777        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2778        // Resolve each SET target to a column position once, validate
2779        // up front so a typo'd column doesn't leave a partial mutation
2780        // behind.
2781        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
2782        for (col, expr) in &stmt.assignments {
2783            let pos = schema_cols
2784                .iter()
2785                .position(|c| c.name == *col)
2786                .ok_or_else(|| {
2787                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
2788                })?;
2789            targets.push((pos, expr));
2790        }
2791        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
2792            .with_default_text_search_config(ts_cfg.as_deref());
2793        // Walk every row, evaluate WHERE then SET expressions. We
2794        // gather (position, new_values) tuples first and apply them
2795        // afterwards so the WHERE/RHS evaluation reads the original
2796        // row state — matches PG semantics (UPDATE doesn't see its
2797        // own writes).
2798        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
2799        for (i, row) in table.rows().iter().enumerate() {
2800            // v4.5: cooperative cancel checkpoint every 256 rows so
2801            // a runaway UPDATE without WHERE doesn't drag past the
2802            // server's query-timeout watchdog.
2803            if i.is_multiple_of(256) {
2804                cancel.check()?;
2805            }
2806            if let Some(w) = &stmt.where_ {
2807                let cond = eval::eval_expr(w, row, &ctx)?;
2808                if !matches!(cond, Value::Bool(true)) {
2809                    continue;
2810                }
2811            }
2812            let mut new_vals = row.values.clone();
2813            for (pos, expr) in &targets {
2814                let v = eval::eval_expr(expr, row, &ctx)?;
2815                new_vals[*pos] =
2816                    coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
2817            }
2818            planned.push((i, new_vals));
2819        }
2820        // v7.6.6 — capture pre-update row values for the FK
2821        // enforcement passes below. `planned` carries new values
2822        // only; pair them with the old row.
2823        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
2824            .iter()
2825            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
2826            .collect();
2827        let self_fks = table.schema().foreign_keys.clone();
2828        // v7.12.5 — `affected` is computed post-BEFORE-trigger
2829        // below (triggers may RETURN NULL to skip individual
2830        // rows). The pre-trigger len shape is no longer accurate.
2831        // Release mutable borrow on `table` for the FK passes.
2832        let _ = table;
2833        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
2834        // local FK columns changed, the new value must exist in the
2835        // parent.
2836        if !self_fks.is_empty() {
2837            let new_rows: Vec<Vec<Value>> = planned
2838                .iter()
2839                .map(|(_pos, new_vals)| new_vals.clone())
2840                .collect();
2841            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
2842        }
2843        // v7.13.0 — CHECK constraint enforcement on UPDATE
2844        // (mailrs round-5 G3). Predicates evaluated against the
2845        // candidate post-UPDATE row; false rejects the UPDATE.
2846        {
2847            let new_rows: Vec<Vec<Value>> = planned
2848                .iter()
2849                .map(|(_pos, new_vals)| new_vals.clone())
2850                .collect();
2851            enforce_check_constraints(self.active_catalog(), &stmt.table, &new_rows)?;
2852        }
2853        // v7.6.6 — Stage 2b: inbound FK check. For every row that
2854        // changed value in a column that *some other table* uses as
2855        // a FK parent column, react per `on_update` action.
2856        let child_plan =
2857            plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
2858        // Stage 3a — apply each child-side action.
2859        for step in &child_plan {
2860            apply_fk_child_step(self.active_catalog_mut(), step)?;
2861        }
2862        // Stage 3b — apply the original UPDATE.
2863        let table = self
2864            .active_catalog_mut()
2865            .get_mut(&stmt.table)
2866            .ok_or_else(|| {
2867                EngineError::Storage(StorageError::TableNotFound {
2868                    name: stmt.table.clone(),
2869                })
2870            })?;
2871        // v7.12.5 — fire BEFORE/AFTER UPDATE row-level triggers
2872        // around the apply loop. BEFORE sees NEW=candidate +
2873        // OLD=current; may rewrite NEW or RETURN NULL to skip.
2874        // AFTER sees NEW=post-write + OLD=pre-write (both read-
2875        // only).
2876        //
2877        // Filter `planned` through the BEFORE pass first so the
2878        // RETURNING snapshot reflects what actually got written
2879        // (triggers may rewrite cells, including a cancellation).
2880        let mut applied_after_before: Vec<(usize, Row, Row)> = Vec::with_capacity(planned.len());
2881        // v7.12.7 — embedded SQL queue.
2882        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
2883        for (pos, new_vals) in &planned {
2884            let old_row = table.rows()[*pos].clone();
2885            let mut new_row = Row::new(new_vals.clone());
2886            let mut skip = false;
2887            for (fd, filter) in &before_update_triggers {
2888                // v7.13.0 — `UPDATE OF cols` filter (mailrs round-5
2889                // G7). Skip this trigger when the filter is set and
2890                // no listed column actually differs between OLD and
2891                // NEW for this row.
2892                if !filter.is_empty()
2893                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2894                {
2895                    continue;
2896                }
2897                let (outcome, deferred) = triggers::fire_row_trigger(
2898                    fd,
2899                    Some(new_row.clone()),
2900                    Some(&old_row),
2901                    &stmt.table,
2902                    &schema_cols,
2903                    &[],
2904                    trigger_session_cfg.as_deref(),
2905                    false,
2906                )
2907                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2908                deferred_embedded.extend(deferred);
2909                match outcome {
2910                    triggers::TriggerOutcome::Row(r) => new_row = r,
2911                    triggers::TriggerOutcome::Skip => {
2912                        skip = true;
2913                        break;
2914                    }
2915                }
2916            }
2917            if !skip {
2918                applied_after_before.push((*pos, new_row, old_row));
2919            }
2920        }
2921        // v7.9.4 — snapshot post-update values for RETURNING (post-
2922        // BEFORE-trigger because triggers can rewrite cells).
2923        let updated_for_returning: Vec<Vec<Value>> = if stmt.returning.is_some() {
2924            applied_after_before
2925                .iter()
2926                .map(|(_pos, new_row, _old)| new_row.values.clone())
2927                .collect()
2928        } else {
2929            Vec::new()
2930        };
2931        let affected = applied_after_before.len();
2932        // Apply, then fire AFTER triggers per row. AFTER runs read-
2933        // only against the freshly-written row; v7.12.4-shape
2934        // assignment errors with a clear message.
2935        for (pos, new_row, old_row) in applied_after_before {
2936            table.update_row(pos, new_row.values.clone())?;
2937            for (fd, filter) in &after_update_triggers {
2938                if !filter.is_empty()
2939                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2940                {
2941                    continue;
2942                }
2943                let (_outcome, deferred) = triggers::fire_row_trigger(
2944                    fd,
2945                    Some(new_row.clone()),
2946                    Some(&old_row),
2947                    &stmt.table,
2948                    &schema_cols,
2949                    &[],
2950                    trigger_session_cfg.as_deref(),
2951                    true,
2952                )
2953                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2954                deferred_embedded.extend(deferred);
2955            }
2956        }
2957        let _ = table;
2958        // v7.12.7 — drain trigger-emitted embedded SQL for this UPDATE.
2959        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
2960        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
2961        if !self.in_transaction() && affected > 0 {
2962            self.statistics
2963                .record_modifications(&stmt.table, affected as u64);
2964        }
2965        // v7.9.4 — RETURNING projection.
2966        if let Some(items) = &stmt.returning {
2967            return self.build_returning_rows(&stmt.table, items, updated_for_returning);
2968        }
2969        Ok(QueryResult::CommandOk {
2970            affected,
2971            modified_catalog: !self.in_transaction(),
2972        })
2973    }
2974
2975    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
2976    /// positions then delegates to `Table::delete_rows` (single index
2977    /// rebuild for the batch).
2978    fn exec_delete_cancel(
2979        &mut self,
2980        stmt: &spg_sql::ast::DeleteStatement,
2981        cancel: CancelToken<'_>,
2982    ) -> Result<QueryResult, EngineError> {
2983        // v7.12.5 — snapshot BEFORE/AFTER DELETE row triggers + the
2984        // session FTS config before the mut borrow (same shape as
2985        // INSERT / UPDATE).
2986        let before_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "BEFORE");
2987        let after_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "AFTER");
2988        let trigger_session_cfg: Option<String> = self
2989            .session_params
2990            .get("default_text_search_config")
2991            .cloned();
2992        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
2993        // locator for the key. The cold row body stays in the
2994        // segment (becoming shadowed garbage that a future
2995        // compaction pass reclaims) but the index no longer
2996        // resolves it. The shadow count contributes to the
2997        // affected total; the subsequent hot walk handles any hot
2998        // rows for the same key.
2999        let mut cold_shadow_count: usize = 0;
3000        if let Some(w) = &stmt.where_ {
3001            let schema_cols = self
3002                .active_catalog()
3003                .get(&stmt.table)
3004                .ok_or_else(|| {
3005                    EngineError::Storage(StorageError::TableNotFound {
3006                        name: stmt.table.clone(),
3007                    })
3008                })?
3009                .schema()
3010                .columns
3011                .clone();
3012            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
3013                && let Some(idx_name) = self
3014                    .active_catalog()
3015                    .get(&stmt.table)
3016                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
3017            {
3018                cold_shadow_count = self
3019                    .active_catalog_mut()
3020                    .shadow_cold_row(&stmt.table, &idx_name, &key)
3021                    .unwrap_or(0);
3022            }
3023        }
3024
3025        // v7.12.1 — cache the session FTS config as an owned
3026        // String before the mutable table borrow below; the
3027        // ctx-builder then references it via `as_deref` so the
3028        // immutable read of `session_params` doesn't conflict
3029        // with the mut borrow chain.
3030        let ts_cfg: Option<String> = self
3031            .session_param("default_text_search_config")
3032            .map(String::from);
3033        let table = self
3034            .active_catalog_mut()
3035            .get_mut(&stmt.table)
3036            .ok_or_else(|| {
3037                EngineError::Storage(StorageError::TableNotFound {
3038                    name: stmt.table.clone(),
3039                })
3040            })?;
3041        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
3042        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
3043            .with_default_text_search_config(ts_cfg.as_deref());
3044        let mut positions: Vec<usize> = Vec::new();
3045        // v7.6.3 — collect every to-delete row's full Value tuple
3046        // alongside its position, so the FK enforcement pass can
3047        // run after the mut borrow drops.
3048        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
3049        for (i, row) in table.rows().iter().enumerate() {
3050            if i.is_multiple_of(256) {
3051                cancel.check()?;
3052            }
3053            let keep = if let Some(w) = &stmt.where_ {
3054                let cond = eval::eval_expr(w, row, &ctx)?;
3055                !matches!(cond, Value::Bool(true))
3056            } else {
3057                false
3058            };
3059            if !keep {
3060                positions.push(i);
3061                to_delete_rows.push(row.values.clone());
3062            }
3063        }
3064        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
3065        // catalog. Release the mut borrow and run reverse-scan
3066        // against every child table whose FK targets this table.
3067        // RESTRICT / NoAction raise an error; CASCADE returns a
3068        // cascade plan that stage 3 applies after the primary delete.
3069        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
3070        let _ = table;
3071        // v7.12.5 — BEFORE DELETE row-level triggers. Each fires
3072        // with NEW=None / OLD=pre-delete row; RETURN OLD (or NEW)
3073        // = proceed, RETURN NULL = skip the row entirely. The
3074        // filter must run BEFORE the FK cascade plan so cascaded
3075        // child rows track the trigger's skip-decision on the
3076        // parent.
3077        // v7.12.7 — embedded SQL queue.
3078        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
3079        if !before_delete_triggers.is_empty() {
3080            let mut filtered_positions: Vec<usize> = Vec::with_capacity(positions.len());
3081            let mut filtered_old_rows: Vec<Vec<Value>> = Vec::with_capacity(to_delete_rows.len());
3082            for (pos, old_vals) in positions.iter().zip(to_delete_rows.iter()) {
3083                let old_row = Row::new(old_vals.clone());
3084                let mut cancel_this = false;
3085                for fd in &before_delete_triggers {
3086                    let (outcome, deferred) = triggers::fire_row_trigger(
3087                        fd,
3088                        None,
3089                        Some(&old_row),
3090                        &stmt.table,
3091                        &schema_cols,
3092                        &[],
3093                        trigger_session_cfg.as_deref(),
3094                        false,
3095                    )
3096                    .map_err(|e| {
3097                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3098                    })?;
3099                    deferred_embedded.extend(deferred);
3100                    if matches!(outcome, triggers::TriggerOutcome::Skip) {
3101                        cancel_this = true;
3102                        break;
3103                    }
3104                }
3105                if !cancel_this {
3106                    filtered_positions.push(*pos);
3107                    filtered_old_rows.push(old_vals.clone());
3108                }
3109            }
3110            positions = filtered_positions;
3111            to_delete_rows = filtered_old_rows;
3112        }
3113        let cascade_plan = plan_fk_parent_deletions(
3114            self.active_catalog(),
3115            &stmt.table,
3116            &positions,
3117            &to_delete_rows,
3118        )?;
3119        // Stage 3a — apply each FK child step (SET NULL / SET
3120        // DEFAULT / CASCADE delete) before deleting the parent.
3121        // The plan is already ordered: nulls/defaults first, then
3122        // cascade deletes (so a row mutated and later deleted
3123        // surfaces as deleted — though v7.6.5 doesn't produce
3124        // that overlap today).
3125        for step in &cascade_plan {
3126            apply_fk_child_step(self.active_catalog_mut(), step)?;
3127        }
3128        // Stage 3b — actually delete the original target rows.
3129        let table = self
3130            .active_catalog_mut()
3131            .get_mut(&stmt.table)
3132            .ok_or_else(|| {
3133                EngineError::Storage(StorageError::TableNotFound {
3134                    name: stmt.table.clone(),
3135                })
3136            })?;
3137        let affected = table.delete_rows(&positions) + cold_shadow_count;
3138        let _ = table;
3139        // v7.12.5 — AFTER DELETE row-level triggers fire post-write
3140        // with NEW=None / OLD=pre-delete row (each from the
3141        // already-snapshotted to_delete_rows). Return value is
3142        // ignored (matches PG AFTER semantics).
3143        if !after_delete_triggers.is_empty() {
3144            for old_vals in &to_delete_rows {
3145                let old_row = Row::new(old_vals.clone());
3146                for fd in &after_delete_triggers {
3147                    let (_outcome, deferred) = triggers::fire_row_trigger(
3148                        fd,
3149                        None,
3150                        Some(&old_row),
3151                        &stmt.table,
3152                        &schema_cols,
3153                        &[],
3154                        trigger_session_cfg.as_deref(),
3155                        true,
3156                    )
3157                    .map_err(|e| {
3158                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3159                    })?;
3160                    deferred_embedded.extend(deferred);
3161                }
3162            }
3163        }
3164        // v7.12.7 — drain trigger-emitted embedded SQL for this DELETE.
3165        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
3166        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
3167        if !self.in_transaction() && affected > 0 {
3168            self.statistics
3169                .record_modifications(&stmt.table, affected as u64);
3170        }
3171        // v7.9.4 — RETURNING projection over the soon-to-be-gone
3172        // rows. `to_delete_rows` was snapshotted in stage 1 before
3173        // mutation, so the projection sees the pre-delete state
3174        // (matches PG semantics: DELETE RETURNING returns the row
3175        // as it was just before removal).
3176        if let Some(items) = &stmt.returning {
3177            return self.build_returning_rows(&stmt.table, items, to_delete_rows);
3178        }
3179        Ok(QueryResult::CommandOk {
3180            affected,
3181            modified_catalog: !self.in_transaction(),
3182        })
3183    }
3184
3185    /// `SHOW TABLES` — one row per table in the active catalog.
3186    /// Column name is `name` so result-set consumers can downstream
3187    /// `SELECT name FROM ...` style logic if needed.
3188    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
3189    /// `QUERY PLAN` text table — first line names the top operator
3190    /// (Scan / Aggregate / Window / etc.), indented children list
3191    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
3192    /// shape, and any active index hits. `ANALYZE` execs the inner
3193    /// SELECT and appends actual-row + elapsed-micros annotations.
3194    #[allow(clippy::format_push_string)]
3195    fn exec_explain(
3196        &self,
3197        e: &spg_sql::ast::ExplainStatement,
3198        cancel: CancelToken<'_>,
3199    ) -> Result<QueryResult, EngineError> {
3200        let mut lines = Vec::<String>::new();
3201        explain_select(&e.inner, self, 0, &mut lines);
3202        if e.suggest {
3203            // v6.8.3 — index advisor. Walks the SELECT's FROM
3204            // tables + WHERE column refs; for each (table, column)
3205            // pair that lacks an index, append a SUGGEST line with
3206            // a copy-pastable `CREATE INDEX` statement. This is a
3207            // pure-syntax heuristic — no cardinality estimation —
3208            // matching the v6.8.3 design intent of "tell the
3209            // operator where indexes are missing", not "give the
3210            // mathematically optimal index set".
3211            let suggestions = build_index_suggestions(&e.inner, self);
3212            for s in suggestions {
3213                lines.push(s);
3214            }
3215        } else if e.analyze {
3216            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
3217            // with `(rows=N)` where the row count is computable
3218            // without re-executing the full query:
3219            //   - Top-level operator (first non-indented line):
3220            //     rows = final result.len()
3221            //   - "From: <table> [full scan]" lines: rows =
3222            //     table.rows().len() (catalog read; no execution)
3223            //   - "From: <table> [index seek]": indeterminate —
3224            //     the index step would need re-execution; v6.2.5
3225            //     adds per-operator wall-clock + hot/cold rows
3226            //     instrumentation that makes this concrete.
3227            //   - Everything else: marked `(—)` so the surface
3228            //     stays well-defined without silently dropping
3229            //     stats. v6.2.5 fills in via inline executor
3230            //     instrumentation.
3231            // Total elapsed lands on a trailing `Total: …` line.
3232            let started = self.clock.map(|f| f());
3233            let exec = self.exec_select_cancel(&e.inner, cancel)?;
3234            let elapsed_micros = match (self.clock, started) {
3235                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
3236                _ => None,
3237            };
3238            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
3239                rows.len()
3240            } else {
3241                0
3242            };
3243            annotate_explain_lines(&mut lines, row_count, self);
3244            let mut total = alloc::format!("Total: rows={row_count}");
3245            if let Some(us) = elapsed_micros {
3246                total.push_str(&alloc::format!(" elapsed={us}us"));
3247            }
3248            lines.push(total);
3249        }
3250        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
3251        let rows: Vec<Row> = lines
3252            .into_iter()
3253            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
3254            .collect();
3255        Ok(QueryResult::Rows { columns, rows })
3256    }
3257
3258    fn exec_show_tables(&self) -> QueryResult {
3259        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
3260        let rows: Vec<Row> = self
3261            .active_catalog()
3262            .table_names()
3263            .into_iter()
3264            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
3265            .collect();
3266        QueryResult::Rows { columns, rows }
3267    }
3268
3269    /// `SHOW COLUMNS FROM <table>` — one row per column with the
3270    /// declared name, SQL type rendering, and nullability flag.
3271    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
3272        let table =
3273            self.active_catalog()
3274                .get(table_name)
3275                .ok_or_else(|| StorageError::TableNotFound {
3276                    name: table_name.into(),
3277                })?;
3278        let columns = alloc::vec![
3279            ColumnSchema::new("name", DataType::Text, false),
3280            ColumnSchema::new("type", DataType::Text, false),
3281            ColumnSchema::new("nullable", DataType::Bool, false),
3282        ];
3283        let rows: Vec<Row> = table
3284            .schema()
3285            .columns
3286            .iter()
3287            .map(|c| {
3288                Row::new(alloc::vec![
3289                    Value::Text(c.name.clone()),
3290                    Value::Text(alloc::format!("{}", c.ty)),
3291                    Value::Bool(c.nullable),
3292                ])
3293            })
3294            .collect();
3295        Ok(QueryResult::Rows { columns, rows })
3296    }
3297
3298    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
3299        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3300        if self.tx_catalogs.contains_key(&tx_id) {
3301            return Err(EngineError::TransactionAlreadyOpen);
3302        }
3303        self.tx_catalogs.insert(
3304            tx_id,
3305            TxState {
3306                catalog: self.catalog.clone(),
3307                savepoints: Vec::new(),
3308            },
3309        );
3310        Ok(QueryResult::CommandOk {
3311            affected: 0,
3312            modified_catalog: false,
3313        })
3314    }
3315
3316    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
3317        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3318        let state = self
3319            .tx_catalogs
3320            .remove(&tx_id)
3321            .ok_or(EngineError::NoActiveTransaction)?;
3322        self.catalog = state.catalog;
3323        // All savepoints become permanent at COMMIT and the stack
3324        // resets for the next TX (`state.savepoints` is discarded with
3325        // `state`).
3326        Ok(QueryResult::CommandOk {
3327            affected: 0,
3328            modified_catalog: true,
3329        })
3330    }
3331
3332    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
3333        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3334        if self.tx_catalogs.remove(&tx_id).is_none() {
3335            return Err(EngineError::NoActiveTransaction);
3336        }
3337        // savepoints discarded with the TxState
3338        Ok(QueryResult::CommandOk {
3339            affected: 0,
3340            modified_catalog: false,
3341        })
3342    }
3343
3344    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
3345        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3346        let state = self
3347            .tx_catalogs
3348            .get_mut(&tx_id)
3349            .ok_or(EngineError::NoActiveTransaction)?;
3350        // PG re-uses an existing savepoint name by dropping the older
3351        // entry and pushing a fresh one — match that behaviour so
3352        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
3353        state.savepoints.retain(|(n, _)| n != &name);
3354        let snapshot = state.catalog.clone();
3355        state.savepoints.push((name, snapshot));
3356        Ok(QueryResult::CommandOk {
3357            affected: 0,
3358            modified_catalog: false,
3359        })
3360    }
3361
3362    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3363        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3364        let state = self
3365            .tx_catalogs
3366            .get_mut(&tx_id)
3367            .ok_or(EngineError::NoActiveTransaction)?;
3368        let pos = state
3369            .savepoints
3370            .iter()
3371            .rposition(|(n, _)| n == name)
3372            .ok_or_else(|| {
3373                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3374            })?;
3375        // The savepoint stays on the stack (PG semantics): a later
3376        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
3377        // after it is discarded.
3378        let snapshot = state.savepoints[pos].1.clone();
3379        state.savepoints.truncate(pos + 1);
3380        state.catalog = snapshot;
3381        Ok(QueryResult::CommandOk {
3382            affected: 0,
3383            modified_catalog: false,
3384        })
3385    }
3386
3387    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3388        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3389        let state = self
3390            .tx_catalogs
3391            .get_mut(&tx_id)
3392            .ok_or(EngineError::NoActiveTransaction)?;
3393        let pos = state
3394            .savepoints
3395            .iter()
3396            .rposition(|(n, _)| n == name)
3397            .ok_or_else(|| {
3398                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3399            })?;
3400        // RELEASE keeps the work since the savepoint, just discards the
3401        // bookmark plus everything nested under it.
3402        state.savepoints.truncate(pos);
3403        Ok(QueryResult::CommandOk {
3404            affected: 0,
3405            modified_catalog: false,
3406        })
3407    }
3408
3409    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
3410    /// (encoding = …)]`. Walks every table in the active catalog
3411    /// looking for an index matching `stmt.name`, then delegates the
3412    /// rebuild (including any encoding switch) to
3413    /// `Table::rebuild_nsw_index`. The "live" non-blocking
3414    /// optimisation is v6.0.4.1 / v6.1.x territory.
3415    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
3416    /// arm. Currently the only setting is `hot_tier_bytes`; later
3417    /// v6.7.x can extend `AlterTableTarget` without touching this
3418    /// arm structure.
3419    fn exec_alter_table(
3420        &mut self,
3421        s: spg_sql::ast::AlterTableStatement,
3422    ) -> Result<QueryResult, EngineError> {
3423        // v7.13.2 — mailrs round-6 S1: apply each subaction in order.
3424        // On first error the statement aborts; subactions already
3425        // applied stay (no transactional rollback in v7.13 — wrap in
3426        // BEGIN/COMMIT if atomicity matters).
3427        let table_name = s.name.clone();
3428        for target in s.targets {
3429            self.exec_alter_table_subaction(&table_name, target)?;
3430        }
3431        Ok(QueryResult::CommandOk {
3432            affected: 0,
3433            modified_catalog: !self.in_transaction(),
3434        })
3435    }
3436
3437    fn exec_alter_table_subaction(
3438        &mut self,
3439        table_name_outer: &str,
3440        target: spg_sql::ast::AlterTableTarget,
3441    ) -> Result<(), EngineError> {
3442        // Inner helper retains the s.name closure shape; alias to `s`
3443        // for minimal diff against the v7.13.0 body.
3444        struct S<'a> {
3445            name: &'a str,
3446        }
3447        let s = S {
3448            name: table_name_outer,
3449        };
3450        match target {
3451            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
3452                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3453                    EngineError::Storage(StorageError::TableNotFound {
3454                        name: s.name.into(),
3455                    })
3456                })?;
3457                table.schema_mut().hot_tier_bytes = Some(n);
3458            }
3459            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
3460                // v7.6.8 — resolve FK against the live catalog first
3461                // (validates parent table, columns, indices). Then
3462                // verify every existing row in the child table
3463                // satisfies the new constraint. Then install it.
3464                let cols_snapshot = self
3465                    .active_catalog()
3466                    .get(s.name)
3467                    .ok_or_else(|| {
3468                        EngineError::Storage(StorageError::TableNotFound {
3469                            name: s.name.into(),
3470                        })
3471                    })?
3472                    .schema()
3473                    .columns
3474                    .clone();
3475                let storage_fk =
3476                    resolve_foreign_key(s.name, &cols_snapshot, fk, self.active_catalog())?;
3477                // Verify existing rows. Treat them as a virtual
3478                // INSERT batch — reusing the v7.6.2 enforce helper.
3479                let existing_rows: Vec<Vec<Value>> = self
3480                    .active_catalog()
3481                    .get(&s.name)
3482                    .expect("checked above")
3483                    .rows()
3484                    .iter()
3485                    .map(|r| r.values.clone())
3486                    .collect();
3487                enforce_fk_inserts(
3488                    self.active_catalog(),
3489                    s.name,
3490                    core::slice::from_ref(&storage_fk),
3491                    &existing_rows,
3492                )?;
3493                // Reject duplicate constraint name.
3494                let table = self
3495                    .active_catalog_mut()
3496                    .get_mut(s.name)
3497                    .expect("checked above");
3498                if let Some(name) = &storage_fk.name
3499                    && table
3500                        .schema()
3501                        .foreign_keys
3502                        .iter()
3503                        .any(|f| f.name.as_ref() == Some(name))
3504                {
3505                    return Err(EngineError::Unsupported(alloc::format!(
3506                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
3507                    )));
3508                }
3509                table.schema_mut().foreign_keys.push(storage_fk);
3510            }
3511            spg_sql::ast::AlterTableTarget::DropForeignKey { name, if_exists } => {
3512                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3513                    EngineError::Storage(StorageError::TableNotFound {
3514                        name: s.name.into(),
3515                    })
3516                })?;
3517                let fks = &mut table.schema_mut().foreign_keys;
3518                let before = fks.len();
3519                fks.retain(|f| f.name.as_ref() != Some(&name));
3520                if fks.len() == before && !if_exists {
3521                    return Err(EngineError::Unsupported(alloc::format!(
3522                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
3523                        s.name
3524                    )));
3525                }
3526                // v7.13.2 mailrs round-6 S7: IF EXISTS silences the miss.
3527            }
3528            spg_sql::ast::AlterTableTarget::AddColumn {
3529                column,
3530                if_not_exists,
3531            } => {
3532                // v7.13.0 — mailrs round-5 G1. Append-only column add
3533                // with back-fill of the DEFAULT (or NULL) into every
3534                // existing row. Column positions don't shift, so we
3535                // skip index rebuild.
3536                let clock = self.clock;
3537                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3538                    EngineError::Storage(StorageError::TableNotFound {
3539                        name: s.name.into(),
3540                    })
3541                })?;
3542                if table
3543                    .schema()
3544                    .columns
3545                    .iter()
3546                    .any(|c| c.name.eq_ignore_ascii_case(&column.name))
3547                {
3548                    if if_not_exists {
3549                        return Ok(());
3550                    }
3551                    return Err(EngineError::Unsupported(alloc::format!(
3552                        "ALTER TABLE ADD COLUMN: column {:?} already exists on {:?}",
3553                        column.name,
3554                        s.name
3555                    )));
3556                }
3557                let col_name = column.name.clone();
3558                let nullable = column.nullable;
3559                let has_default =
3560                    column.default.is_some() || column.auto_increment;
3561                let col_schema = column_def_to_schema(column)?;
3562                let row_count = table.row_count();
3563                // Compute the back-fill value. Literal / runtime DEFAULT
3564                // funnels through the same resolver that INSERT uses
3565                // (v7.9.21 `resolve_column_default_free`). NULL when
3566                // the column is nullable and has no DEFAULT. NOT NULL
3567                // without DEFAULT errors when the table has existing
3568                // rows — same as PG.
3569                let fill_value: Value = if has_default
3570                    || col_schema.runtime_default.is_some()
3571                {
3572                    resolve_column_default_free(&col_schema, clock)?
3573                } else if nullable || row_count == 0 {
3574                    Value::Null
3575                } else {
3576                    return Err(EngineError::Unsupported(alloc::format!(
3577                        "ALTER TABLE ADD COLUMN {col_name:?}: NOT NULL column requires DEFAULT \
3578                         when the table has existing rows"
3579                    )));
3580                };
3581                table.add_column(col_schema, fill_value);
3582            }
3583            spg_sql::ast::AlterTableTarget::AlterColumnType {
3584                column,
3585                new_type,
3586                using,
3587            } => {
3588                // v7.13.0 — mailrs round-5 G8. Re-evaluate each
3589                // row's column value (either through the USING
3590                // expression if supplied, or as a direct CAST of
3591                // the existing value) and re-coerce to the new
3592                // type. Indices on the column get rebuilt.
3593                let new_data_type = column_type_to_data_type(new_type);
3594                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3595                    EngineError::Storage(StorageError::TableNotFound {
3596                        name: s.name.into(),
3597                    })
3598                })?;
3599                let col_pos = table
3600                    .schema()
3601                    .columns
3602                    .iter()
3603                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3604                    .ok_or_else(|| {
3605                        EngineError::Unsupported(alloc::format!(
3606                            "ALTER COLUMN TYPE: column {column:?} not found on {:?}",
3607                            s.name
3608                        ))
3609                    })?;
3610                let schema_cols = table.schema().columns.clone();
3611                let ctx = eval::EvalContext::new(&schema_cols, None);
3612                let mut new_values: alloc::vec::Vec<Value> =
3613                    alloc::vec::Vec::with_capacity(table.row_count());
3614                for row in table.rows().iter() {
3615                    let raw = match &using {
3616                        Some(expr) => eval::eval_expr(expr, row, &ctx).map_err(|e| {
3617                            EngineError::Unsupported(alloc::format!(
3618                                "ALTER COLUMN TYPE: USING expression failed: {e:?}"
3619                            ))
3620                        })?,
3621                        None => row.values.get(col_pos).cloned().unwrap_or(Value::Null),
3622                    };
3623                    let coerced = coerce_value(raw, new_data_type, &column, col_pos)?;
3624                    new_values.push(coerced);
3625                }
3626                table.schema_mut().columns[col_pos].ty = new_data_type;
3627                for (i, v) in new_values.into_iter().enumerate() {
3628                    let mut row_values = table
3629                        .rows()
3630                        .get(i)
3631                        .expect("bounds-checked above")
3632                        .values
3633                        .clone();
3634                    row_values[col_pos] = v;
3635                    table.update_row(i, row_values)?;
3636                }
3637            }
3638            spg_sql::ast::AlterTableTarget::AddTableConstraint(tc) => {
3639                // v7.14.0 — pg_dump emits PKs as a separate
3640                // ALTER TABLE ADD CONSTRAINT post-CREATE-TABLE.
3641                // For PRIMARY KEY / UNIQUE, install a UC entry
3642                // and the implicit BTree index on the leading
3643                // column. CHECK: append predicate to schema.
3644                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3645                    EngineError::Storage(StorageError::TableNotFound {
3646                        name: s.name.into(),
3647                    })
3648                })?;
3649                let is_pk = matches!(
3650                    tc,
3651                    spg_sql::ast::TableConstraint::PrimaryKey { .. }
3652                );
3653                match tc {
3654                    spg_sql::ast::TableConstraint::PrimaryKey { columns, .. }
3655                    | spg_sql::ast::TableConstraint::Unique { columns, .. } => {
3656                        let positions: Vec<usize> = columns
3657                            .iter()
3658                            .map(|c| {
3659                                table
3660                                    .schema()
3661                                    .columns
3662                                    .iter()
3663                                    .position(|sc| sc.name.eq_ignore_ascii_case(c))
3664                                    .ok_or_else(|| {
3665                                        EngineError::Unsupported(alloc::format!(
3666                                            "ALTER TABLE ADD CONSTRAINT: column {c:?} not found on {:?}",
3667                                            s.name
3668                                        ))
3669                                    })
3670                            })
3671                            .collect::<Result<Vec<_>, _>>()?;
3672                        // Skip if an equivalent UC is already there
3673                        // (idempotent — pg_dump's PK + a prior inline
3674                        // PK shouldn't double-install).
3675                        let already = table
3676                            .schema()
3677                            .uniqueness_constraints
3678                            .iter()
3679                            .any(|u| u.columns == positions);
3680                        if !already {
3681                            table.schema_mut().uniqueness_constraints.push(
3682                                spg_storage::UniquenessConstraint {
3683                                    is_primary_key: is_pk,
3684                                    columns: positions.clone(),
3685                                    nulls_not_distinct: false,
3686                                },
3687                            );
3688                            // PK implies NOT NULL on referenced cols.
3689                            if is_pk {
3690                                for p in &positions {
3691                                    if let Some(c) = table.schema_mut().columns.get_mut(*p) {
3692                                        c.nullable = false;
3693                                    }
3694                                }
3695                            }
3696                            // Add a BTree index on the leading
3697                            // column for INSERT-side enforcement.
3698                            let leading = &columns[0];
3699                            let already_idx = table.indices().iter().any(|idx| {
3700                                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3701                                    && table.schema().columns[idx.column_position].name
3702                                        == *leading
3703                            });
3704                            if !already_idx {
3705                                let suffix = if is_pk { "pkey" } else { "key" };
3706                                let idx_name = alloc::format!("{}_{leading}_{suffix}", s.name);
3707                                let _ = table.add_index(idx_name, leading);
3708                            }
3709                        }
3710                    }
3711                    spg_sql::ast::TableConstraint::Check { expr, .. } => {
3712                        table.schema_mut().checks.push(alloc::format!("{expr}"));
3713                    }
3714                    spg_sql::ast::TableConstraint::Index { name, columns } => {
3715                        // v7.15.0 — ALTER TABLE ADD KEY (cols).
3716                        // mysqldump occasionally emits this
3717                        // post-CREATE-TABLE shape; build a BTree
3718                        // on the leading column using the
3719                        // user-supplied or synthesised name.
3720                        let leading = &columns[0];
3721                        let already_idx = table.indices().iter().any(|idx| {
3722                            matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3723                                && table.schema().columns[idx.column_position].name == *leading
3724                        });
3725                        if !already_idx {
3726                            let idx_name = name
3727                                .clone()
3728                                .unwrap_or_else(|| alloc::format!("{}_{leading}_idx", s.name));
3729                            let _ = table.add_index(idx_name, leading);
3730                        }
3731                    }
3732                }
3733            }
3734            spg_sql::ast::AlterTableTarget::DropColumn {
3735                column,
3736                if_exists,
3737                cascade,
3738            } => {
3739                // v7.13.3 — mailrs round-7 S8. Remove the column +
3740                // every row's value at that position; drop any index
3741                // on the column. RESTRICT (default) rejects when an
3742                // FK on this table or partial-index predicate
3743                // references the column; CASCADE removes those
3744                // dependents first.
3745                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3746                    EngineError::Storage(StorageError::TableNotFound {
3747                        name: s.name.into(),
3748                    })
3749                })?;
3750                let col_pos = match table
3751                    .schema()
3752                    .columns
3753                    .iter()
3754                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3755                {
3756                    Some(p) => p,
3757                    None => {
3758                        if if_exists {
3759                            return Ok(());
3760                        }
3761                        return Err(EngineError::Unsupported(alloc::format!(
3762                            "ALTER TABLE DROP COLUMN: column {column:?} not found on {:?}",
3763                            s.name
3764                        )));
3765                    }
3766                };
3767                // Dependent check: FKs whose local columns include
3768                // col_pos. CASCADE drops them; otherwise reject.
3769                let dependent_fks: Vec<usize> = table
3770                    .schema()
3771                    .foreign_keys
3772                    .iter()
3773                    .enumerate()
3774                    .filter_map(|(i, fk)| {
3775                        if fk.local_columns.contains(&col_pos) {
3776                            Some(i)
3777                        } else {
3778                            None
3779                        }
3780                    })
3781                    .collect();
3782                if !dependent_fks.is_empty() && !cascade {
3783                    return Err(EngineError::Unsupported(alloc::format!(
3784                        "ALTER TABLE DROP COLUMN {column:?}: column has FK dependents; \
3785                         use DROP COLUMN ... CASCADE to remove them"
3786                    )));
3787                }
3788                // CASCADE the FK removals first.
3789                if cascade {
3790                    // Drop in reverse so indices stay valid.
3791                    let mut sorted = dependent_fks.clone();
3792                    sorted.sort();
3793                    sorted.reverse();
3794                    let fks = &mut table.schema_mut().foreign_keys;
3795                    for i in sorted {
3796                        fks.remove(i);
3797                    }
3798                }
3799                // Drop the column. New helper on Table does the
3800                // row + schema + index shift atomically.
3801                table.drop_column(col_pos);
3802            }
3803            spg_sql::ast::AlterTableTarget::RenameColumn { old, new } => {
3804                // v7.15.0 — `ALTER TABLE t RENAME [COLUMN] old TO
3805                // new`. Rename the column in the schema; rewrite
3806                // every stored source string on this table that
3807                // references it as a (potentially-qualified)
3808                // column identifier: CHECK predicates, partial-
3809                // index predicates, runtime DEFAULT expressions.
3810                // Then walk catalog triggers on this table and
3811                // patch any `UPDATE OF` column list. Function and
3812                // trigger bodies are NOT auto-rewritten — that
3813                // surface is dynamic SQL territory; users update
3814                // those separately (matches PG plpgsql behavior:
3815                // a column rename invalidates name-referencing
3816                // plpgsql at call time, not rename time).
3817                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3818                    EngineError::Storage(StorageError::TableNotFound {
3819                        name: s.name.into(),
3820                    })
3821                })?;
3822                let col_pos = table
3823                    .schema()
3824                    .columns
3825                    .iter()
3826                    .position(|c| c.name.eq_ignore_ascii_case(&old))
3827                    .ok_or_else(|| {
3828                        EngineError::Unsupported(alloc::format!(
3829                            "ALTER TABLE RENAME COLUMN: column {old:?} not found on {:?}",
3830                            s.name
3831                        ))
3832                    })?;
3833                // Reject same-name (case-insensitive) collision.
3834                if table
3835                    .schema()
3836                    .columns
3837                    .iter()
3838                    .enumerate()
3839                    .any(|(i, c)| i != col_pos && c.name.eq_ignore_ascii_case(&new))
3840                {
3841                    return Err(EngineError::Unsupported(alloc::format!(
3842                        "ALTER TABLE RENAME COLUMN: column {new:?} already exists on {:?}",
3843                        s.name
3844                    )));
3845                }
3846                // Schema rename first — even idempotent same-name
3847                // rename (`ALTER TABLE t RENAME a TO a`) needs to
3848                // be a no-op, not an error.
3849                if old.eq_ignore_ascii_case(&new) {
3850                    return Ok(());
3851                }
3852                table.rename_column(col_pos, &new);
3853                // Rewrite per-column runtime_default sources on
3854                // every column of this table — a DEFAULT expression
3855                // on column X may reference column Y by name (rare,
3856                // but legal in PG when the value is supplied via a
3857                // function that takes the row).
3858                let n_cols = table.schema().columns.len();
3859                for i in 0..n_cols {
3860                    let rt = table.schema().columns[i].runtime_default.clone();
3861                    if let Some(src) = rt {
3862                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
3863                        table.schema_mut().columns[i].runtime_default = Some(rewritten);
3864                    }
3865                }
3866                // Rewrite table-level CHECK predicates.
3867                let checks = table.schema().checks.clone();
3868                let mut new_checks = Vec::with_capacity(checks.len());
3869                for chk in checks {
3870                    new_checks.push(rewrite_column_in_source(&chk, &old, &new)?);
3871                }
3872                table.schema_mut().checks = new_checks;
3873                // Rewrite per-index partial_predicate sources.
3874                let n_idx = table.indices().len();
3875                for i in 0..n_idx {
3876                    let pred = table.indices()[i].partial_predicate.clone();
3877                    if let Some(src) = pred {
3878                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
3879                        // SAFETY: indices_mut would be cleanest, but
3880                        // partial_predicate is the only mutable field
3881                        // here; reach in via the public mut accessor.
3882                        table.set_partial_predicate(i, Some(rewritten));
3883                    }
3884                }
3885                // Walk catalog triggers; patch `update_columns` on
3886                // triggers attached to this table.
3887                let table_name = s.name.to_string();
3888                for trig in self.active_catalog_mut().triggers_mut() {
3889                    if !trig.table.eq_ignore_ascii_case(&table_name) {
3890                        continue;
3891                    }
3892                    for c in &mut trig.update_columns {
3893                        if c.eq_ignore_ascii_case(&old) {
3894                            *c = new.clone();
3895                        }
3896                    }
3897                }
3898            }
3899        }
3900        Ok(())
3901    }
3902
3903    fn exec_alter_index(
3904        &mut self,
3905        stmt: spg_sql::ast::AlterIndexStatement,
3906    ) -> Result<QueryResult, EngineError> {
3907        // Translate the optional SQL-side encoding choice into the
3908        // storage-side enum; the same SqlVecEncoding -> VecEncoding
3909        // bridge `column_type_to_data_type` uses.
3910        let spg_sql::ast::AlterIndexStatement {
3911            name: idx_name,
3912            target,
3913        } = stmt;
3914        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target;
3915        let target = encoding.map(|e| match e {
3916            SqlVecEncoding::F32 => VecEncoding::F32,
3917            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
3918            SqlVecEncoding::F16 => VecEncoding::F16,
3919        });
3920        // Linear scan: index names are globally unique within a
3921        // catalog (enforced by add_nsw_index_inner) so the first
3922        // match is the only one. Save the table name to avoid
3923        // borrowing while we then take a mut borrow.
3924        let table_name = {
3925            let cat = self.active_catalog();
3926            let mut found: Option<String> = None;
3927            for tname in cat.table_names() {
3928                if let Some(t) = cat.get(&tname)
3929                    && t.indices().iter().any(|i| i.name == idx_name)
3930                {
3931                    found = Some(tname);
3932                    break;
3933                }
3934            }
3935            found.ok_or_else(|| {
3936                EngineError::Storage(StorageError::IndexNotFound {
3937                    name: idx_name.clone(),
3938                })
3939            })?
3940        };
3941        let table = self
3942            .active_catalog_mut()
3943            .get_mut(&table_name)
3944            .expect("table found above");
3945        table.rebuild_nsw_index(&idx_name, target)?;
3946        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
3947        // changes cost characteristics; evict any cached plans.
3948        self.plan_cache.evict_referencing(&table_name);
3949        Ok(QueryResult::CommandOk {
3950            affected: 0,
3951            modified_catalog: !self.in_transaction(),
3952        })
3953    }
3954
3955    fn exec_create_index(
3956        &mut self,
3957        stmt: CreateIndexStatement,
3958    ) -> Result<QueryResult, EngineError> {
3959        let table = self
3960            .active_catalog_mut()
3961            .get_mut(&stmt.table)
3962            .ok_or_else(|| {
3963                EngineError::Storage(StorageError::TableNotFound {
3964                    name: stmt.table.clone(),
3965                })
3966            })?;
3967        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
3968        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
3969            return Ok(QueryResult::CommandOk {
3970                affected: 0,
3971                modified_catalog: false,
3972            });
3973        }
3974        // v7.9.14 — multi-column index parses through; engine
3975        // builds a single-column BTree on the leading column only.
3976        // The extras live on the AST so spg-server's dispatcher
3977        // can emit a PG-wire NoticeResponse / log line. Composite
3978        // BTree keys land in v7.10.
3979        let _ = &stmt.extra_columns; // intentional drop on engine side
3980        let table_name = stmt.table.clone();
3981        // v6.8.0 — resolve INCLUDE column names to positions. Done
3982        // before `add_index` so a typo error surfaces before any
3983        // catalog mutation lands.
3984        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
3985            Vec::new()
3986        } else {
3987            let schema = table.schema();
3988            stmt.included_columns
3989                .iter()
3990                .map(|c| {
3991                    schema.column_position(c).ok_or_else(|| {
3992                        EngineError::Storage(StorageError::ColumnNotFound { column: c.clone() })
3993                    })
3994                })
3995                .collect::<Result<Vec<_>, _>>()?
3996        };
3997        match stmt.method {
3998            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
3999            IndexMethod::Hnsw => {
4000                if !included_positions.is_empty() {
4001                    return Err(EngineError::Unsupported(
4002                        "INCLUDE columns are not supported on HNSW indexes".into(),
4003                    ));
4004                }
4005                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
4006            }
4007            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
4008            IndexMethod::Brin => {
4009                if !included_positions.is_empty() {
4010                    return Err(EngineError::Unsupported(
4011                        "INCLUDE columns are not supported on BRIN indexes".into(),
4012                    ));
4013                }
4014                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
4015            }
4016            // v7.12.3 — GIN inverted index. Real posting-list-backed
4017            // GIN when the indexed column is `tsvector`; falls back
4018            // to a BTree on the leading column for any other column
4019            // type so v7.9.26b's `pg_dump` compatibility (GIN on
4020            // JSONB etc. silently loading as BTree) is preserved.
4021            // Operators see the real GIN only where it matters; old
4022            // schemas keep loading.
4023            IndexMethod::Gin => {
4024                if !included_positions.is_empty() {
4025                    return Err(EngineError::Unsupported(
4026                        "INCLUDE columns are not supported on GIN indexes".into(),
4027                    ));
4028                }
4029                let col_pos = table
4030                    .schema()
4031                    .column_position(&stmt.column)
4032                    .ok_or_else(|| {
4033                        EngineError::Storage(StorageError::ColumnNotFound {
4034                            column: stmt.column.clone(),
4035                        })
4036                    })?;
4037                let col_ty = table.schema().columns[col_pos].ty;
4038                // v7.15.0 — `gin_trgm_ops` on a TEXT/VARCHAR
4039                // column dispatches to the real trigram-shingle
4040                // GIN build (LIKE / similarity acceleration).
4041                // Other GIN opclasses fall through to the regular
4042                // tsvector-vs-BTree split below.
4043                let is_trgm = stmt
4044                    .opclass
4045                    .as_deref()
4046                    .is_some_and(|op| op.eq_ignore_ascii_case("gin_trgm_ops"));
4047                if is_trgm
4048                    && matches!(
4049                        col_ty,
4050                        spg_storage::DataType::Text | spg_storage::DataType::Varchar(_)
4051                    )
4052                {
4053                    table
4054                        .add_gin_trgm_index(stmt.name.clone(), &stmt.column)
4055                        .map_err(EngineError::Storage)?;
4056                } else if col_ty == spg_storage::DataType::TsVector {
4057                    table
4058                        .add_gin_index(stmt.name.clone(), &stmt.column)
4059                        .map_err(EngineError::Storage)?;
4060                } else {
4061                    // v7.9.26b BTree fallback — the catalog still
4062                    // gets an index entry on the leading column so
4063                    // pg_dump scripts that name GIN on JSONB / etc.
4064                    // load clean; query-time gain stays opt-in for
4065                    // tsvector callers.
4066                    table.add_index(stmt.name.clone(), &stmt.column)?;
4067                }
4068            }
4069        }
4070        if !included_positions.is_empty()
4071            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
4072        {
4073            idx.included_columns = included_positions;
4074        }
4075        // v6.8.1 — persist partial-index predicate. Stored as the
4076        // expression's Display form so the catalog snapshot stays
4077        // pure (storage has no spg-sql dependency). The runtime
4078        // maintenance path treats partial indexes identically to
4079        // full indexes for v6.8.1 (over-maintenance is safe; the
4080        // planner-side "use partial when query WHERE implies the
4081        // predicate" pass is STABILITY carve-out).
4082        if let Some(pred_expr) = &stmt.partial_predicate {
4083            let canonical = pred_expr.to_string();
4084            // v7.13.2 — mailrs round-6 S2. PG's `pg_trgm` uses
4085            // `CREATE INDEX … USING gin(col gin_trgm_ops) WHERE …`
4086            // routinely to slim trigram indexes. SPG now persists
4087            // the predicate for GIN / BRIN / HNSW the same way it
4088            // already does for BTree — same v6.8.1 "over-maintain
4089            // is safe; planner-side partial routing is STABILITY
4090            // carve-out" semantics. HNSW carries an additional
4091            // caveat: the predicate isn't applied at index build
4092            // time (would require per-row eval inside the NSW
4093            // construction loop), so the index oversamples; query
4094            // time the WHERE clause still filters correctly.
4095            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4096                idx.partial_predicate = Some(canonical);
4097            }
4098        }
4099        // v6.8.2 — persist expression index key. Same Display-form
4100        // storage; the runtime maintenance pass evaluates each
4101        // row's expression to derive the index key, but for v6.8.2
4102        // the engine falls through to the bare-column-reference
4103        // path and the expression is preserved for format-layer
4104        // round-trip + future planner work. Carved-out in
4105        // STABILITY § "Out of v6.8".
4106        if let Some(key_expr) = &stmt.expression {
4107            if matches!(
4108                stmt.method,
4109                IndexMethod::Hnsw | IndexMethod::Brin | IndexMethod::Gin
4110            ) {
4111                return Err(EngineError::Unsupported(
4112                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
4113                ));
4114            }
4115            let canonical = key_expr.to_string();
4116            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4117                idx.expression = Some(canonical);
4118            }
4119        }
4120        // v7.9.29 — persist `is_unique` flag on the storage Index.
4121        // Combined with `partial_predicate`, INSERT enforcement
4122        // checks that no other row whose predicate evaluates true
4123        // shares the same indexed key. Parser already rejected
4124        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
4125        // For multi-column UNIQUE INDEX the extras matter (the
4126        // full tuple is the uniqueness key), so resolve them to
4127        // column positions and persist on the index too.
4128        if stmt.is_unique {
4129            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
4130            for col_name in &stmt.extra_columns {
4131                let pos = table
4132                    .schema()
4133                    .columns
4134                    .iter()
4135                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
4136                    .ok_or_else(|| {
4137                        EngineError::Unsupported(alloc::format!(
4138                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
4139                            stmt.name,
4140                            stmt.table
4141                        ))
4142                    })?;
4143                extra_positions.push(pos);
4144            }
4145            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4146                idx.is_unique = true;
4147                idx.extra_column_positions = extra_positions;
4148            }
4149            // At index-creation time, check the existing rows for
4150            // pre-existing duplicates that would have violated the
4151            // new constraint — otherwise CREATE UNIQUE INDEX would
4152            // silently leave duplicates in place.
4153            let snapshot_indices = table.indices().to_vec();
4154            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
4155                table.rows().iter().cloned().collect();
4156            let snapshot_schema = table.schema().clone();
4157            let idx_ref = snapshot_indices
4158                .iter()
4159                .find(|i| i.name == stmt.name)
4160                .expect("just-added index");
4161            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
4162        }
4163        // v6.3.1 — adding an index can change the optimal plan for
4164        // any cached query that references this table.
4165        self.plan_cache.evict_referencing(&table_name);
4166        Ok(QueryResult::CommandOk {
4167            affected: 0,
4168            modified_catalog: !self.in_transaction(),
4169        })
4170    }
4171
4172    /// v7.13.3 — mailrs round-7 S9. SPG-specific reconciliation
4173    /// for `CREATE TABLE IF NOT EXISTS` when the table already
4174    /// exists. Adds missing columns + inline FKs from the new
4175    /// definition; existing columns / constraints stay untouched.
4176    /// New columns with a `NOT NULL` declaration without a
4177    /// `DEFAULT` are reported as a clear error rather than
4178    /// silently dropped — this is the "fail loud on real
4179    /// incompatibility, fail silent on schema-superset" tradeoff.
4180    fn reconcile_table_if_not_exists(
4181        &mut self,
4182        stmt: CreateTableStatement,
4183    ) -> Result<QueryResult, EngineError> {
4184        let table_name = stmt.name.clone();
4185        let clock = self.clock;
4186        let existing_col_names: alloc::collections::BTreeSet<String> = self
4187            .active_catalog()
4188            .get(&table_name)
4189            .expect("checked above")
4190            .schema()
4191            .columns
4192            .iter()
4193            .map(|c| c.name.to_ascii_lowercase())
4194            .collect();
4195        let row_count = self
4196            .active_catalog()
4197            .get(&table_name)
4198            .expect("checked above")
4199            .row_count();
4200        // Collect missing column defs in source order.
4201        let new_columns: alloc::vec::Vec<spg_sql::ast::ColumnDef> = stmt
4202            .columns
4203            .iter()
4204            .filter(|c| !existing_col_names.contains(&c.name.to_ascii_lowercase()))
4205            .cloned()
4206            .collect();
4207        for col_def in new_columns {
4208            let col_name = col_def.name.clone();
4209            let nullable = col_def.nullable;
4210            let has_default = col_def.default.is_some() || col_def.auto_increment;
4211            let col_schema = column_def_to_schema(col_def)?;
4212            let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
4213                resolve_column_default_free(&col_schema, clock)?
4214            } else if nullable || row_count == 0 {
4215                Value::Null
4216            } else {
4217                return Err(EngineError::Unsupported(alloc::format!(
4218                    "CREATE TABLE IF NOT EXISTS {table_name:?}: reconciling \
4219                     column {col_name:?} requires DEFAULT (existing rows would violate NOT NULL)"
4220                )));
4221            };
4222            let table = self
4223                .active_catalog_mut()
4224                .get_mut(&table_name)
4225                .expect("checked above");
4226            table.add_column(col_schema, fill_value);
4227        }
4228        // Resolve any newly-added inline FKs (column-level
4229        // REFERENCES forms) and install. Skip FKs whose local
4230        // columns we didn't have in the existing table.
4231        let table_cols_now = self
4232            .active_catalog()
4233            .get(&table_name)
4234            .expect("checked above")
4235            .schema()
4236            .columns
4237            .clone();
4238        for fk in stmt.foreign_keys {
4239            // Only install FKs whose every local column resolves
4240            // — older catalogs may have a column the new FK
4241            // references but not the column the new FK declares.
4242            let all_resolved = fk
4243                .columns
4244                .iter()
4245                .all(|c| table_cols_now.iter().any(|sc| sc.name.eq_ignore_ascii_case(c)));
4246            if !all_resolved {
4247                continue;
4248            }
4249            let already_present = {
4250                let table = self
4251                    .active_catalog()
4252                    .get(&table_name)
4253                    .expect("checked above");
4254                table.schema().foreign_keys.iter().any(|f| {
4255                    f.parent_table.eq_ignore_ascii_case(&fk.parent_table)
4256                        && f.local_columns.len() == fk.columns.len()
4257                })
4258            };
4259            if already_present {
4260                continue;
4261            }
4262            let storage_fk =
4263                resolve_foreign_key(&table_name, &table_cols_now, fk, self.active_catalog())?;
4264            let table = self
4265                .active_catalog_mut()
4266                .get_mut(&table_name)
4267                .expect("checked above");
4268            table.schema_mut().foreign_keys.push(storage_fk);
4269        }
4270        Ok(QueryResult::CommandOk {
4271            affected: 0,
4272            modified_catalog: !self.in_transaction(),
4273        })
4274    }
4275
4276    /// v7.14.0 — DROP TABLE handler (pg_dump / mysqldump preamble).
4277    fn exec_drop_table(
4278        &mut self,
4279        names: Vec<String>,
4280        if_exists: bool,
4281    ) -> Result<QueryResult, EngineError> {
4282        for name in names {
4283            let dropped = self.active_catalog_mut().drop_table(&name);
4284            if !dropped && !if_exists {
4285                return Err(EngineError::Storage(StorageError::TableNotFound { name }));
4286            }
4287        }
4288        Ok(QueryResult::CommandOk {
4289            affected: 0,
4290            modified_catalog: !self.in_transaction(),
4291        })
4292    }
4293
4294    /// v7.14.0 — DROP INDEX handler.
4295    fn exec_drop_index(
4296        &mut self,
4297        name: String,
4298        if_exists: bool,
4299    ) -> Result<QueryResult, EngineError> {
4300        let dropped = self.active_catalog_mut().drop_named_index(&name);
4301        if !dropped && !if_exists {
4302            return Err(EngineError::Storage(StorageError::IndexNotFound { name }));
4303        }
4304        Ok(QueryResult::CommandOk {
4305            affected: 0,
4306            modified_catalog: !self.in_transaction(),
4307        })
4308    }
4309
4310    fn exec_create_table(
4311        &mut self,
4312        stmt: CreateTableStatement,
4313    ) -> Result<QueryResult, EngineError> {
4314        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
4315            // v7.13.3 — mailrs round-7 S9 reconciliation. PG's
4316            // semantics for `CREATE TABLE IF NOT EXISTS` is a
4317            // silent no-op when the table exists, even if the new
4318            // definition adds columns or constraints. SPG extends
4319            // this: any column in the new definition that's
4320            // missing from the existing table is added (with
4321            // DEFAULT back-fill / NULL); inline FKs likewise.
4322            // Existing columns are NOT modified. This makes
4323            // mailrs's schema layering (init-schema's `contacts`
4324            // sender-tracking table + migrate-023's CardDAV
4325            // `contacts` extension) converge correctly without
4326            // mailrs-side edits. PG users who want PG-strict
4327            // silent-no-op behaviour can use SPG's `--strict-pg`
4328            // flag (deferred to v7.14).
4329            return self.reconcile_table_if_not_exists(stmt);
4330        }
4331        let table_name = stmt.name.clone();
4332        // v7.9.13 — pluck the names of any columns marked
4333        // `PRIMARY KEY` inline so the post-create-table pass can
4334        // build an implicit BTree index. mailrs F1.
4335        let inline_pk_columns: Vec<String> = stmt
4336            .columns
4337            .iter()
4338            .filter(|c| c.is_primary_key)
4339            .map(|c| c.name.clone())
4340            .collect();
4341        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
4342        // and UNIQUE (a, b, ...). Each builds a BTree index on the
4343        // leading column (the existing single-column storage tier)
4344        // and registers a UniquenessConstraint on the schema for
4345        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
4346        let cols = stmt
4347            .columns
4348            .into_iter()
4349            .map(column_def_to_schema)
4350            .collect::<Result<Vec<_>, _>>()?;
4351        // Composite NOT-NULL implication for PRIMARY KEY columns.
4352        let mut cols = cols;
4353        for tc in &stmt.table_constraints {
4354            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
4355                for col_name in columns {
4356                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
4357                        col.nullable = false;
4358                    }
4359                }
4360            }
4361        }
4362        // v7.6.1 — resolve every FK in the statement against the
4363        // already-known catalog. Validates: parent table exists,
4364        // parent column names exist, arity matches, parent columns
4365        // have a PK / UNIQUE index. Self-referencing FKs (parent
4366        // table == this table) resolve against the column list we
4367        // just built — they don't need the catalog yet.
4368        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
4369            Vec::with_capacity(stmt.foreign_keys.len());
4370        for fk in stmt.foreign_keys {
4371            // v7.14.0 — when SET FOREIGN_KEY_CHECKS=0 is in effect
4372            // (mysqldump preamble + bulk imports), defer FK
4373            // resolution if the parent table isn't in the catalog
4374            // yet. The FK is queued and resolved when checks flip
4375            // back on. Self-references stay in-band (the parent is
4376            // the same as the child we're building).
4377            let needs_parent = !fk.parent_table.eq_ignore_ascii_case(&table_name);
4378            if !self.foreign_key_checks
4379                && needs_parent
4380                && self.active_catalog().get(&fk.parent_table).is_none()
4381            {
4382                self.pending_foreign_keys
4383                    .push((table_name.clone(), fk));
4384                continue;
4385            }
4386            fks.push(resolve_foreign_key(
4387                &table_name,
4388                &cols,
4389                fk,
4390                self.active_catalog(),
4391            )?);
4392        }
4393        let mut schema = TableSchema::new(table_name.clone(), cols);
4394        schema.foreign_keys = fks;
4395        // v7.9.19 — translate AST table_constraints to storage
4396        // UniquenessConstraints (column name → position) so the
4397        // INSERT enforcement helper sees positions directly.
4398        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
4399        let mut check_exprs: Vec<String> = Vec::new();
4400        for tc in &stmt.table_constraints {
4401            let (is_pk, names, nnd) = match tc {
4402                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4403                    (true, columns.clone(), false)
4404                }
4405                spg_sql::ast::TableConstraint::Unique {
4406                    columns,
4407                    nulls_not_distinct,
4408                    ..
4409                } => (false, columns.clone(), *nulls_not_distinct),
4410                spg_sql::ast::TableConstraint::Check { expr, .. } => {
4411                    // v7.13.0 — collect CHECK predicate sources;
4412                    // they get attached to the schema below.
4413                    check_exprs.push(alloc::format!("{expr}"));
4414                    continue;
4415                }
4416                // v7.15.0 — plain `KEY (cols)` from MySQL inline
4417                // is NOT a uniqueness constraint; skip the UC
4418                // build path entirely. The BTree index lands in
4419                // the post-create loop below alongside the PK/UQ
4420                // implicit indexes.
4421                spg_sql::ast::TableConstraint::Index { .. } => continue,
4422            };
4423            let mut positions = Vec::with_capacity(names.len());
4424            for n in &names {
4425                let pos = schema
4426                    .columns
4427                    .iter()
4428                    .position(|c| c.name == *n)
4429                    .ok_or_else(|| {
4430                        EngineError::Unsupported(alloc::format!(
4431                            "table constraint references unknown column {n:?}"
4432                        ))
4433                    })?;
4434                positions.push(pos);
4435            }
4436            uc_storage.push(spg_storage::UniquenessConstraint {
4437                is_primary_key: is_pk,
4438                columns: positions,
4439                nulls_not_distinct: nnd,
4440            });
4441        }
4442        schema.uniqueness_constraints = uc_storage.clone();
4443        schema.checks = check_exprs;
4444        self.active_catalog_mut().create_table(schema)?;
4445        // v7.9.13 — implicit BTree per inline PK column +
4446        // v7.9.19 — implicit BTree on the leading column of every
4447        // table-level PRIMARY KEY / UNIQUE constraint.
4448        let table = self
4449            .active_catalog_mut()
4450            .get_mut(&table_name)
4451            .expect("just created");
4452        for (i, col_name) in inline_pk_columns.iter().enumerate() {
4453            let idx_name = if inline_pk_columns.len() == 1 {
4454                alloc::format!("{table_name}_pkey")
4455            } else {
4456                alloc::format!("{table_name}_pkey_{i}")
4457            };
4458            if let Err(e) = table.add_index(idx_name, col_name) {
4459                return Err(EngineError::Storage(e));
4460            }
4461        }
4462        for (i, tc) in stmt.table_constraints.iter().enumerate() {
4463            // v7.15.0 — plain KEY/INDEX rides this same loop so
4464            // the implicit BTree gets built. It carries its own
4465            // user-supplied name; PK/UQ still synthesise.
4466            let (suffix, names, explicit_name): (&str, &Vec<String>, Option<&String>) = match tc {
4467                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4468                    ("pkey", columns, None)
4469                }
4470                spg_sql::ast::TableConstraint::Unique { columns, .. } => ("key", columns, None),
4471                spg_sql::ast::TableConstraint::Index { name, columns } => {
4472                    ("idx", columns, name.as_ref())
4473                }
4474                spg_sql::ast::TableConstraint::Check { .. } => continue,
4475            };
4476            let leading = &names[0];
4477            // Skip if a same-column BTree already exists (e.g.
4478            // inline PK on the leading column).
4479            let already = table.indices().iter().any(|idx| {
4480                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
4481                    && table.schema().columns[idx.column_position].name == *leading
4482            });
4483            if already {
4484                continue;
4485            }
4486            let idx_name = if let Some(n) = explicit_name {
4487                n.clone()
4488            } else if names.len() == 1 {
4489                alloc::format!("{table_name}_{leading}_{suffix}")
4490            } else {
4491                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
4492            };
4493            if let Err(e) = table.add_index(idx_name, leading) {
4494                return Err(EngineError::Storage(e));
4495            }
4496        }
4497        Ok(QueryResult::CommandOk {
4498            affected: 0,
4499            modified_catalog: !self.in_transaction(),
4500        })
4501    }
4502
4503    fn exec_insert(&mut self, stmt: InsertStatement) -> Result<QueryResult, EngineError> {
4504        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
4505        // round-5 G4). Execute the inner SELECT first, then route
4506        // back through the regular VALUES code path with the
4507        // materialised rows.
4508        if let Some(select) = stmt.select_source.clone() {
4509            let select_result = self.exec_select_cancel(&select, CancelToken::none())?;
4510            let rows = match select_result {
4511                QueryResult::Rows { rows, .. } => rows,
4512                other => {
4513                    return Err(EngineError::Unsupported(alloc::format!(
4514                        "INSERT … SELECT: inner statement produced {other:?} instead of a row set"
4515                    )));
4516                }
4517            };
4518            let mut materialised: Vec<Vec<Expr>> = Vec::with_capacity(rows.len());
4519            for row in rows {
4520                let mut tuple: Vec<Expr> = Vec::with_capacity(row.values.len());
4521                for v in row.values {
4522                    tuple.push(value_to_literal_expr_permissive(v)?);
4523                }
4524                materialised.push(tuple);
4525            }
4526            let recurse = InsertStatement {
4527                table: stmt.table,
4528                columns: stmt.columns,
4529                rows: materialised,
4530                select_source: None,
4531                on_conflict: stmt.on_conflict,
4532                returning: stmt.returning,
4533            };
4534            return self.exec_insert(recurse);
4535        }
4536        // v7.9.21 — snapshot the clock fn pointer before the mut
4537        // borrow on the catalog opens; runtime DEFAULT eval needs
4538        // it inside the row hot loop.
4539        let clock = self.clock;
4540        // v7.12.4 — snapshot row-level triggers + their referenced
4541        // functions before the mut borrow on the catalog opens.
4542        // Cloned out so the row hot loop can fire them without
4543        // re-borrowing the catalog (which would conflict with
4544        // table.insert's mutable borrow).
4545        let before_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "BEFORE");
4546        let after_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "AFTER");
4547        let trigger_session_cfg: Option<alloc::string::String> = self
4548            .session_params
4549            .get("default_text_search_config")
4550            .cloned();
4551        let table = self
4552            .active_catalog_mut()
4553            .get_mut(&stmt.table)
4554            .ok_or_else(|| {
4555                EngineError::Storage(StorageError::TableNotFound {
4556                    name: stmt.table.clone(),
4557                })
4558            })?;
4559        // v3.1.5: clone the columns vector only (not the whole
4560        // TableSchema — saves one String alloc for the table name).
4561        // We need an owned snapshot because we'll call `table.insert`
4562        // (mutable borrow on `table`) inside the row loop while
4563        // reading schema fields.
4564        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
4565        let schema_cols_len = column_meta.len();
4566        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
4567        // column `c` is filled from the `j`-th tuple slot; `None` means
4568        // "fill with NULL". Validated once and reused for every row.
4569        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
4570            None => None, // 1-1 mapping, fast path
4571            Some(cols) => {
4572                let mut map = alloc::vec![None; schema_cols_len];
4573                for (j, name) in cols.iter().enumerate() {
4574                    let idx = column_meta
4575                        .iter()
4576                        .position(|c| c.name == *name)
4577                        .ok_or_else(|| {
4578                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
4579                        })?;
4580                    if map[idx].is_some() {
4581                        return Err(EngineError::Storage(StorageError::ArityMismatch {
4582                            expected: schema_cols_len,
4583                            actual: cols.len(),
4584                        }));
4585                    }
4586                    map[idx] = Some(j);
4587                }
4588                // Omitted columns must either be nullable, carry a
4589                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
4590                // omissions up front so the WAL stays clean.
4591                for (i, col) in column_meta.iter().enumerate() {
4592                    if map[i].is_none()
4593                        && !col.nullable
4594                        && col.default.is_none()
4595                        && col.runtime_default.is_none()
4596                        && !col.auto_increment
4597                    {
4598                        return Err(EngineError::Storage(StorageError::NullInNotNull {
4599                            column: col.name.clone(),
4600                        }));
4601                    }
4602                }
4603                Some(map)
4604            }
4605        };
4606        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
4607        // v7.6.2 — snapshot this table's FK list before the
4608        // mutable-borrow window so we can run parent lookups
4609        // against the immutable catalog after parsing. Empty vec is
4610        // the no-FK fast path; clone cost is O(fks * arity) which
4611        // is < 100 ns for typical schemas.
4612        let fks = table.schema().foreign_keys.clone();
4613        let mut affected = 0usize;
4614        // Stage 1 — parse + AUTO_INC + coerce all rows under the
4615        // single mutable borrow.
4616        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
4617        for tuple in stmt.rows {
4618            if tuple.len() != expected_tuple_len {
4619                return Err(EngineError::Storage(StorageError::ArityMismatch {
4620                    expected: expected_tuple_len,
4621                    actual: tuple.len(),
4622                }));
4623            }
4624            // Fast path: no column-list permutation → tuple slot j
4625            // maps to schema column j. We can zip schema with tuple
4626            // and skip the `raw_tuple` staging allocation entirely.
4627            let values: Vec<Value> = if let Some(map) = &tuple_pos {
4628                // Permuted path: still need raw_tuple to index by `map[i]`.
4629                let raw_tuple: Vec<Value> = tuple
4630                    .into_iter()
4631                    .map(literal_expr_to_value)
4632                    .collect::<Result<_, _>>()?;
4633                let mut out = Vec::with_capacity(schema_cols_len);
4634                for (i, col) in column_meta.iter().enumerate() {
4635                    let mut raw = match map[i] {
4636                        Some(j) => raw_tuple[j].clone(),
4637                        None => resolve_column_default_free(col, clock)?,
4638                    };
4639                    if col.auto_increment && raw.is_null() {
4640                        let next = table.next_auto_value(i).ok_or_else(|| {
4641                            EngineError::Unsupported(alloc::format!(
4642                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4643                                col.name
4644                            ))
4645                        })?;
4646                        raw = Value::BigInt(next);
4647                    }
4648                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4649                }
4650                out
4651            } else {
4652                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
4653                let mut out = Vec::with_capacity(schema_cols_len);
4654                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
4655                    let mut raw = literal_expr_to_value(expr)?;
4656                    if col.auto_increment && raw.is_null() {
4657                        let next = table.next_auto_value(i).ok_or_else(|| {
4658                            EngineError::Unsupported(alloc::format!(
4659                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4660                                col.name
4661                            ))
4662                        })?;
4663                        raw = Value::BigInt(next);
4664                    }
4665                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4666                }
4667                out
4668            };
4669            all_values.push(values);
4670        }
4671        // Stage 2 — FK enforcement on the immutable catalog.
4672        // Non-lexical lifetimes release the mutable borrow on
4673        // `table` here since stage 1 was the last use. The
4674        // parent-table lookup runs before any row is committed.
4675        let uniqueness = table.schema().uniqueness_constraints.clone();
4676        let _ = table;
4677        if !fks.is_empty() {
4678            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
4679        }
4680        // v7.13.0 — CHECK constraint enforcement (mailrs round-5 G3).
4681        enforce_check_constraints(self.active_catalog(), &stmt.table, &all_values)?;
4682        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
4683        enforce_uniqueness_inserts(self.active_catalog(), &stmt.table, &uniqueness, &all_values)?;
4684        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
4685        // Independent of table-level UniquenessConstraint (which
4686        // can't carry a predicate). Walks the table's indexes;
4687        // for each `is_unique` index, only rows whose
4688        // partial_predicate evaluates truthy are checked for
4689        // collision. mailrs K1.
4690        enforce_unique_index_inserts(self.active_catalog(), &stmt.table, &all_values)?;
4691        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
4692        //   - `DO NOTHING` filters `all_values` to non-conflicting
4693        //     rows + drops within-batch duplicates.
4694        //   - `DO UPDATE SET …` ALSO filters, but for each
4695        //     conflicting row it queues an UPDATE on the existing
4696        //     row using the incoming row's values as `EXCLUDED.*`.
4697        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
4698        let mut skipped_count = 0usize;
4699        if let Some(clause) = &stmt.on_conflict {
4700            let conflict_cols = resolve_on_conflict_columns(
4701                self.active_catalog(),
4702                &stmt.table,
4703                clause.target_columns.as_slice(),
4704            )?;
4705            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
4706            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
4707            for values in all_values {
4708                let key_tuple: Vec<&Value> = conflict_cols.iter().map(|&c| &values[c]).collect();
4709                // SQL spec: NULL in any conflict column means "no
4710                // conflict possible" (NULL ≠ NULL for uniqueness).
4711                let has_null_key = key_tuple.iter().any(|v| matches!(v, Value::Null));
4712                let collides_with_table = !has_null_key
4713                    && on_conflict_keys_exist(
4714                        self.active_catalog(),
4715                        &stmt.table,
4716                        &conflict_cols,
4717                        &key_tuple,
4718                    );
4719                let key_tuple_owned: Vec<Value> = key_tuple.iter().map(|v| (*v).clone()).collect();
4720                let collides_with_batch =
4721                    !has_null_key && seen_keys.iter().any(|k| k == &key_tuple_owned);
4722                let collides = collides_with_table || collides_with_batch;
4723                match (&clause.action, collides) {
4724                    (_, false) => {
4725                        seen_keys.push(key_tuple_owned);
4726                        kept.push(values);
4727                    }
4728                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
4729                        skipped_count += 1;
4730                    }
4731                    (
4732                        spg_sql::ast::OnConflictAction::Update {
4733                            assignments,
4734                            where_,
4735                        },
4736                        true,
4737                    ) => {
4738                        if !collides_with_table {
4739                            skipped_count += 1;
4740                            continue;
4741                        }
4742                        let target_pos = lookup_row_position_by_keys(
4743                            self.active_catalog(),
4744                            &stmt.table,
4745                            &conflict_cols,
4746                            &key_tuple,
4747                        )
4748                        .ok_or_else(|| {
4749                            EngineError::Unsupported(
4750                                "ON CONFLICT DO UPDATE: conflict detected but row \
4751                                 position could not be resolved (cold-tier row?)"
4752                                    .into(),
4753                            )
4754                        })?;
4755                        let updated = apply_on_conflict_assignments(
4756                            self.active_catalog(),
4757                            &stmt.table,
4758                            target_pos,
4759                            &values,
4760                            assignments,
4761                            where_.as_ref(),
4762                        )?;
4763                        if let Some(new_row) = updated {
4764                            pending_updates.push((target_pos, new_row));
4765                        } else {
4766                            skipped_count += 1;
4767                        }
4768                    }
4769                }
4770            }
4771            all_values = kept;
4772        }
4773        // Stage 3 — insert all rows under a fresh mutable borrow.
4774        let table = self
4775            .active_catalog_mut()
4776            .get_mut(&stmt.table)
4777            .ok_or_else(|| {
4778                EngineError::Storage(StorageError::TableNotFound {
4779                    name: stmt.table.clone(),
4780                })
4781            })?;
4782        // v7.9.4 — keep RETURNING projection rows separate per
4783        // INSERT and per UPDATE branch so DO UPDATE pushes the new
4784        // post-update state, not the incoming-only values.
4785        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
4786        // v7.12.7 — collect embedded SQL emitted by any trigger
4787        // fire across the row loop; engine drains the queue after
4788        // the table mut borrow drops.
4789        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
4790        'rowloop: for values in all_values {
4791            let mut row = Row::new(values);
4792            // v7.12.4 — BEFORE INSERT row-level triggers. Each
4793            // trigger may rewrite NEW cells (e.g. populate
4794            // `search_vector := to_tsvector(...)`) and may return
4795            // NULL to skip the row entirely.
4796            for fd in &before_insert_triggers {
4797                let (outcome, deferred) = triggers::fire_row_trigger(
4798                    fd,
4799                    Some(row.clone()),
4800                    None,
4801                    &stmt.table,
4802                    &column_meta,
4803                    &[],
4804                    trigger_session_cfg.as_deref(),
4805                    false,
4806                )
4807                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4808                deferred_embedded.extend(deferred);
4809                match outcome {
4810                    triggers::TriggerOutcome::Row(r) => row = r,
4811                    triggers::TriggerOutcome::Skip => continue 'rowloop,
4812                }
4813            }
4814            if stmt.returning.is_some() {
4815                returning_rows.push(row.values.clone());
4816            }
4817            // v7.12.4 — clone for the AFTER trigger view; insert
4818            // moves the row into the table.
4819            let inserted = row.clone();
4820            table.insert(row)?;
4821            affected += 1;
4822            // v7.12.4 — AFTER INSERT row-level triggers fire post-
4823            // write. Return value is ignored (PG semantics); we
4824            // surface any error from the body up to the caller.
4825            for fd in &after_insert_triggers {
4826                let (_outcome, deferred) = triggers::fire_row_trigger(
4827                    fd,
4828                    Some(inserted.clone()),
4829                    None,
4830                    &stmt.table,
4831                    &column_meta,
4832                    &[],
4833                    trigger_session_cfg.as_deref(),
4834                    true,
4835                )
4836                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4837                deferred_embedded.extend(deferred);
4838            }
4839        }
4840        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
4841        // in the conflict-resolution pass. update_row handles
4842        // index maintenance + body re-encoding.
4843        for (pos, new_row) in pending_updates {
4844            if stmt.returning.is_some() {
4845                returning_rows.push(new_row.clone());
4846            }
4847            table.update_row(pos, new_row)?;
4848            affected += 1;
4849        }
4850        let _ = skipped_count;
4851        // v7.12.7 — drop the table mut borrow and drain any
4852        // trigger-emitted embedded SQL queued during this INSERT.
4853        // The borrow has to release first because each deferred
4854        // stmt may UPDATE / INSERT / DELETE the same (or another)
4855        // table — including, in principle, this one.
4856        let _ = table;
4857        self.execute_deferred_trigger_stmts(deferred_embedded, CancelToken::none())?;
4858        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
4859        // up in the table after this statement (insert or
4860        // post-update on conflict).
4861        if let Some(items) = &stmt.returning {
4862            return self.build_returning_rows(&stmt.table, items, returning_rows);
4863        }
4864        // v6.2.1 — auto-analyze: track per-table modified-row
4865        // counter so the background sweep can decide when to
4866        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
4867        // — one BTreeMap entry update per INSERT batch.
4868        if !self.in_transaction() && affected > 0 {
4869            self.statistics
4870                .record_modifications(&stmt.table, affected as u64);
4871        }
4872        Ok(QueryResult::CommandOk {
4873            affected,
4874            modified_catalog: !self.in_transaction(),
4875        })
4876    }
4877
4878    /// v4.5: SELECT with cooperative cancellation. The token is
4879    /// honoured between UNION peers and inside the bare-SELECT row
4880    /// loop; HNSW kNN graph walks and the aggregate executor don't
4881    /// honour it yet (deferred — those paths bound their work
4882    /// internally by `LIMIT k` and `GROUP BY` cardinality).
4883    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
4884    /// by id, decodes each row body against the table's current
4885    /// schema, applies the SELECT's projection + optional WHERE +
4886    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
4887    /// / ORDER BY are unsupported on this path (STABILITY carve-
4888    /// out); operators wanting them should restore the segment
4889    /// into a regular table first.
4890    fn exec_select_as_of_segment(
4891        &self,
4892        stmt: &SelectStatement,
4893        from: &spg_sql::ast::FromClause,
4894        segment_id: u32,
4895    ) -> Result<QueryResult, EngineError> {
4896        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
4897        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
4898        if !from.joins.is_empty()
4899            || stmt.group_by.is_some()
4900            || stmt.having.is_some()
4901            || !stmt.unions.is_empty()
4902            || !stmt.order_by.is_empty()
4903            || stmt.offset.is_some()
4904            || stmt.distinct
4905            || aggregate::uses_aggregate(stmt)
4906        {
4907            return Err(EngineError::Unsupported(
4908                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
4909                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
4910                    .into(),
4911            ));
4912        }
4913        let table = self
4914            .active_catalog()
4915            .get(&from.primary.name)
4916            .ok_or_else(|| StorageError::TableNotFound {
4917                name: from.primary.name.clone(),
4918            })?;
4919        let schema = table.schema().clone();
4920        let schema_cols = &schema.columns;
4921        let alias = from
4922            .primary
4923            .alias
4924            .as_deref()
4925            .unwrap_or(from.primary.name.as_str());
4926        let ctx = EvalContext::new(schema_cols, Some(alias));
4927        let seg = self
4928            .active_catalog()
4929            .cold_segment(segment_id)
4930            .ok_or_else(|| {
4931                EngineError::Unsupported(alloc::format!(
4932                    "AS OF SEGMENT: cold segment {segment_id} not registered"
4933                ))
4934            })?;
4935        let mut out_rows: Vec<Row> = Vec::new();
4936        let mut limit_remaining: Option<usize> =
4937            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
4938        for (_key, body) in seg.scan() {
4939            let (row, _consumed) =
4940                spg_storage::decode_row_body_dense(&body, &schema).map_err(EngineError::Storage)?;
4941            if let Some(where_expr) = &stmt.where_ {
4942                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
4943                if !matches!(cond, Value::Bool(true)) {
4944                    continue;
4945                }
4946            }
4947            // Projection.
4948            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
4949            out_rows.push(projected);
4950            if let Some(rem) = limit_remaining.as_mut() {
4951                if *rem == 0 {
4952                    out_rows.pop();
4953                    break;
4954                }
4955                *rem -= 1;
4956            }
4957        }
4958        // Output column schema: derive from SELECT items.
4959        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
4960        Ok(QueryResult::Rows {
4961            columns,
4962            rows: out_rows,
4963        })
4964    }
4965
4966    /// v6.10.2 — simple-path WHERE eval that doesn't go through
4967    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
4968    /// scan paths predicate against a snapshot frozen segment, no
4969    /// cross-row state.
4970    fn eval_expr_simple(
4971        &self,
4972        expr: &Expr,
4973        row: &Row,
4974        ctx: &EvalContext,
4975    ) -> Result<Value, EngineError> {
4976        let cancel = CancelToken::none();
4977        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
4978    }
4979
4980    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
4981    /// Given the table name, the user-supplied projection items,
4982    /// and the mutated rows (post-insert / post-update values, or
4983    /// pre-delete snapshot), build a `QueryResult::Rows` whose
4984    /// schema describes the projected columns. Mailrs migration
4985    /// blocker #1.
4986    fn build_returning_rows(
4987        &self,
4988        table_name: &str,
4989        items: &[SelectItem],
4990        mutated_rows: Vec<Vec<Value>>,
4991    ) -> Result<QueryResult, EngineError> {
4992        let table = self.active_catalog().get(table_name).ok_or_else(|| {
4993            EngineError::Storage(StorageError::TableNotFound {
4994                name: table_name.into(),
4995            })
4996        })?;
4997        let schema_cols = table.schema().columns.clone();
4998        let columns = self.derive_output_columns(items, &schema_cols, table_name);
4999        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
5000        for values in mutated_rows {
5001            let row = Row::new(values);
5002            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
5003            out_rows.push(projected);
5004        }
5005        Ok(QueryResult::Rows {
5006            columns,
5007            rows: out_rows,
5008        })
5009    }
5010
5011    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
5012    /// `SelectItem::Wildcard` to all schema columns and
5013    /// `SelectItem::Expr` via the regular eval path.
5014    fn project_row_simple(
5015        &self,
5016        row: &Row,
5017        items: &[SelectItem],
5018        schema_cols: &[ColumnSchema],
5019        alias: &str,
5020    ) -> Result<Row, EngineError> {
5021        let ctx = EvalContext::new(schema_cols, Some(alias));
5022        let cancel = CancelToken::none();
5023        let mut out_vals = Vec::new();
5024        for item in items {
5025            match item {
5026                SelectItem::Wildcard => {
5027                    out_vals.extend(row.values.iter().cloned());
5028                }
5029                SelectItem::Expr { expr, .. } => {
5030                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
5031                    out_vals.push(v);
5032                }
5033            }
5034        }
5035        Ok(Row::new(out_vals))
5036    }
5037
5038    /// v6.10.2 — derive the output `ColumnSchema` list for an
5039    /// AS OF SEGMENT projection. Wildcards take the full schema;
5040    /// expressions take the alias if present or a synthetic
5041    /// `?column?` (PG convention) otherwise.
5042    fn derive_output_columns(
5043        &self,
5044        items: &[SelectItem],
5045        schema_cols: &[ColumnSchema],
5046        _alias: &str,
5047    ) -> Vec<ColumnSchema> {
5048        let mut out = Vec::new();
5049        for item in items {
5050            match item {
5051                SelectItem::Wildcard => {
5052                    out.extend(schema_cols.iter().cloned());
5053                }
5054                SelectItem::Expr { alias, .. } => {
5055                    let name = alias.clone().unwrap_or_else(|| "?column?".to_string());
5056                    // Default to Text; the caller's row values
5057                    // carry the actual type. v6.10.2 scope.
5058                    out.push(ColumnSchema::new(name, DataType::Text, true));
5059                }
5060            }
5061        }
5062        out
5063    }
5064
5065    fn exec_select_cancel(
5066        &self,
5067        stmt: &SelectStatement,
5068        cancel: CancelToken<'_>,
5069    ) -> Result<QueryResult, EngineError> {
5070        cancel.check()?;
5071        // v6.10.2 — cold-tier time-travel short-circuit. When the
5072        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
5073        // dedicated cold-segment scan instead of the regular
5074        // hot+index path. The scope is intentionally narrow for
5075        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
5076        // optionally with a single-column-equality WHERE. JOINs /
5077        // aggregates / ORDER BY / subqueries on top of a time-
5078        // travelled scan are STABILITY § "Out of v6.10".
5079        if let Some(from) = &stmt.from
5080            && let Some(seg_id) = from.primary.as_of_segment
5081        {
5082            return self.exec_select_as_of_segment(stmt, from, seg_id);
5083        }
5084        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
5085        // pre-CTE because they don't read from the catalog and
5086        // shouldn't participate in regular FROM resolution.
5087        if let Some(from) = &stmt.from
5088            && from.joins.is_empty()
5089            && stmt.where_.is_none()
5090            && stmt.group_by.is_none()
5091            && stmt.having.is_none()
5092            && stmt.unions.is_empty()
5093            && stmt.order_by.is_empty()
5094            && stmt.limit.is_none()
5095            && stmt.offset.is_none()
5096            && !stmt.distinct
5097            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
5098        {
5099            let lower = from.primary.name.to_ascii_lowercase();
5100            match lower.as_str() {
5101                "spg_statistic" => return Ok(self.exec_spg_statistic()),
5102                // v6.5.0 — observability v2 virtual tables.
5103                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
5104                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
5105                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
5106                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
5107                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
5108                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
5109                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
5110                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
5111                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
5112                _ => {}
5113            }
5114        }
5115        // v4.11: CTEs materialise into a temporary enriched catalog
5116        // *before* anything else — the body SELECT can then refer
5117        // to CTE names via the regular FROM-clause resolution.
5118        // Uncorrelated only: each CTE body runs once against the
5119        // current catalog, not against later CTEs' results (left-
5120        // to-right materialisation would relax this, but we keep
5121        // it simple for v4.11 MVP).
5122        if !stmt.ctes.is_empty() {
5123            return self.exec_with_ctes(stmt, cancel);
5124        }
5125        // v4.10: subqueries (uncorrelated) are resolved here, before
5126        // the executor sees the row loop. We clone the statement so
5127        // we can mutate without disturbing the caller's AST — most
5128        // queries pass through with no subquery nodes and the clone
5129        // is cheap; with subqueries the materialisation cost
5130        // dominates anyway.
5131        let mut stmt_owned;
5132        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
5133            stmt_owned = stmt.clone();
5134            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
5135            &stmt_owned
5136        } else {
5137            stmt
5138        };
5139        if stmt_ref.unions.is_empty() {
5140            return self.exec_bare_select_cancel(stmt_ref, cancel);
5141        }
5142        // UNION path: clone-strip the head into a bare block (its own
5143        // DISTINCT and any inner ORDER BY are dropped by parser rule —
5144        // the wrapper SelectStatement carries them), execute, then chain
5145        // peers with left-associative dedup semantics.
5146        let mut head = stmt_ref.clone();
5147        head.unions = Vec::new();
5148        head.order_by = Vec::new();
5149        head.limit = None;
5150        let QueryResult::Rows { columns, mut rows } =
5151            self.exec_bare_select_cancel(&head, cancel)?
5152        else {
5153            unreachable!("bare SELECT cannot return CommandOk")
5154        };
5155        for (kind, peer) in &stmt_ref.unions {
5156            let QueryResult::Rows {
5157                columns: peer_cols,
5158                rows: peer_rows,
5159            } = self.exec_bare_select_cancel(peer, cancel)?
5160            else {
5161                unreachable!("bare SELECT cannot return CommandOk")
5162            };
5163            if peer_cols.len() != columns.len() {
5164                return Err(EngineError::Unsupported(alloc::format!(
5165                    "UNION arity mismatch: head has {} columns, peer has {}",
5166                    columns.len(),
5167                    peer_cols.len()
5168                )));
5169            }
5170            rows.extend(peer_rows);
5171            if matches!(kind, UnionKind::Distinct) {
5172                rows = dedup_rows(rows);
5173            }
5174        }
5175        // ORDER BY at the top of a UNION applies to the combined result.
5176        // Eval against the projected schema (NOT the source table).
5177        if !stmt.order_by.is_empty() {
5178            let synth_ctx = EvalContext::new(&columns, None);
5179            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5180            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
5181            for r in rows {
5182                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
5183                tagged.push((keys, r));
5184            }
5185            sort_by_keys(&mut tagged, &descs);
5186            rows = tagged.into_iter().map(|(_, r)| r).collect();
5187        }
5188        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
5189        Ok(QueryResult::Rows { columns, rows })
5190    }
5191
5192    #[allow(clippy::too_many_lines)]
5193    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
5194    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
5195    /// Synthesises a single-column virtual table whose column type
5196    /// is TEXT and whose rows are the array elements. Routes
5197    /// through the regular projection / WHERE / ORDER BY / LIMIT
5198    /// machinery so set-returning UNNEST composes naturally with
5199    /// the rest of the SELECT surface.
5200    fn exec_select_unnest(
5201        &self,
5202        stmt: &SelectStatement,
5203        primary: &TableRef,
5204        cancel: CancelToken<'_>,
5205    ) -> Result<QueryResult, EngineError> {
5206        let expr = primary
5207            .unnest_expr
5208            .as_deref()
5209            .expect("caller guards unnest_expr.is_some()");
5210        // Evaluate the array expression once. Empty schema / empty
5211        // row — uncorrelated UNNEST cannot reference outer columns.
5212        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
5213        let ctx = EvalContext::new(&empty_schema, None);
5214        let dummy_row = Row::new(alloc::vec::Vec::new());
5215        // v7.11.13 — unnest dispatches per array element type so
5216        // INT[] / BIGINT[] surface their PG types in projection.
5217        let (elem_dtype, rows): (DataType, alloc::vec::Vec<Row>) =
5218            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5219                Value::Null => (DataType::Text, alloc::vec::Vec::new()),
5220                Value::TextArray(items) => {
5221                    let rows = items
5222                        .into_iter()
5223                        .map(|item| {
5224                            Row::new(alloc::vec![match item {
5225                                Some(s) => Value::Text(s),
5226                                None => Value::Null,
5227                            }])
5228                        })
5229                        .collect();
5230                    (DataType::Text, rows)
5231                }
5232                Value::IntArray(items) => {
5233                    let rows = items
5234                        .into_iter()
5235                        .map(|item| {
5236                            Row::new(alloc::vec![match item {
5237                                Some(n) => Value::Int(n),
5238                                None => Value::Null,
5239                            }])
5240                        })
5241                        .collect();
5242                    (DataType::Int, rows)
5243                }
5244                Value::BigIntArray(items) => {
5245                    let rows = items
5246                        .into_iter()
5247                        .map(|item| {
5248                            Row::new(alloc::vec![match item {
5249                                Some(n) => Value::BigInt(n),
5250                                None => Value::Null,
5251                            }])
5252                        })
5253                        .collect();
5254                    (DataType::BigInt, rows)
5255                }
5256                other => {
5257                    return Err(EngineError::Unsupported(alloc::format!(
5258                        "unnest() expects an array argument, got {:?}",
5259                        other.data_type()
5260                    )));
5261                }
5262            };
5263        let alias = primary
5264            .alias
5265            .clone()
5266            .unwrap_or_else(|| "unnest".to_string());
5267        // v7.13.2 — mailrs round-6 S5. Honour PG-standard
5268        // `UNNEST(arr) AS p(col_name)` column-list aliasing: the
5269        // first entry overrides the projected column's name.
5270        // Without the column list, fall back to the table alias
5271        // (pre-v7.13.2 behaviour).
5272        let col_name = primary
5273            .unnest_column_aliases
5274            .first()
5275            .cloned()
5276            .unwrap_or_else(|| alias.clone());
5277        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
5278        let schema_cols = alloc::vec![col_schema.clone()];
5279        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
5280        // Apply WHERE.
5281        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
5282            let mut out = alloc::vec::Vec::with_capacity(rows.len());
5283            for row in rows {
5284                cancel.check()?;
5285                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
5286                if matches!(v, Value::Bool(true)) {
5287                    out.push(row);
5288                }
5289            }
5290            out
5291        } else {
5292            rows
5293        };
5294        // Projection.
5295        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
5296        let mut projected_rows: alloc::vec::Vec<Row> =
5297            alloc::vec::Vec::with_capacity(filtered.len());
5298        for row in &filtered {
5299            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
5300            for p in &projection {
5301                vals.push(eval::eval_expr(&p.expr, row, &scan_ctx).map_err(EngineError::Eval)?);
5302            }
5303            projected_rows.push(Row::new(vals));
5304        }
5305        // ORDER BY / LIMIT — apply on the projected rows (cheap;
5306        // unnest result sets are small by design).
5307        let columns: alloc::vec::Vec<ColumnSchema> = projection
5308            .iter()
5309            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
5310            .collect();
5311        // Re-evaluate ORDER BY against the source schema (pre-projection
5312        // so col refs by name still resolve through `scan_ctx`).
5313        if !stmt.order_by.is_empty() {
5314            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
5315                .iter()
5316                .enumerate()
5317                .map(|(i, r)| -> Result<_, EngineError> {
5318                    let keys: Result<Vec<Value>, EngineError> = stmt
5319                        .order_by
5320                        .iter()
5321                        .map(|ob| {
5322                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
5323                        })
5324                        .collect();
5325                    Ok((i, keys?))
5326                })
5327                .collect::<Result<_, _>>()?;
5328            indexed.sort_by(|a, b| {
5329                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
5330                    let mut cmp = value_cmp(ka, kb);
5331                    if stmt.order_by[idx].desc {
5332                        cmp = cmp.reverse();
5333                    }
5334                    if cmp != core::cmp::Ordering::Equal {
5335                        return cmp;
5336                    }
5337                }
5338                core::cmp::Ordering::Equal
5339            });
5340            projected_rows = indexed
5341                .into_iter()
5342                .map(|(i, _)| projected_rows[i].clone())
5343                .collect();
5344        }
5345        // LIMIT / OFFSET — apply at the tail.
5346        if let Some(offset) = stmt.offset_literal() {
5347            let off = (offset as usize).min(projected_rows.len());
5348            projected_rows.drain(..off);
5349        }
5350        if let Some(limit) = stmt.limit_literal() {
5351            projected_rows.truncate(limit as usize);
5352        }
5353        Ok(QueryResult::Rows {
5354            columns,
5355            rows: projected_rows,
5356        })
5357    }
5358
5359    fn exec_bare_select_cancel(
5360        &self,
5361        stmt: &SelectStatement,
5362        cancel: CancelToken<'_>,
5363    ) -> Result<QueryResult, EngineError> {
5364        // v4.12: window-function path. When the projection contains
5365        // any `name(args) OVER (...)` we route to the dedicated
5366        // executor — partition + sort + per-row window value before
5367        // the regular projection.
5368        if select_has_window(stmt) {
5369            return self.exec_select_with_window(stmt, cancel);
5370        }
5371        // Constant SELECT (no FROM) — evaluate each item once against an
5372        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
5373        // `SELECT '7'::INT`. Column references will surface as
5374        // ColumnNotFound on eval since the schema is empty.
5375        let Some(from) = &stmt.from else {
5376            let empty_schema: Vec<ColumnSchema> = Vec::new();
5377            let ctx = self.ev_ctx(&empty_schema, None);
5378            let projection = build_projection(&stmt.items, &empty_schema, "")?;
5379            let dummy_row = Row::new(Vec::new());
5380            let mut values = Vec::with_capacity(projection.len());
5381            for p in &projection {
5382                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
5383            }
5384            let columns: Vec<ColumnSchema> = projection
5385                .into_iter()
5386                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5387                .collect();
5388            return Ok(QueryResult::Rows {
5389                columns,
5390                rows: alloc::vec![Row::new(values)],
5391            });
5392        };
5393        // Multi-table FROM (one or more joined peers) goes through the
5394        // nested-loop join executor. Single-table FROM stays on the
5395        // existing scan + index-seek path.
5396        if !from.joins.is_empty() {
5397            return self.exec_joined_select(stmt, from);
5398        }
5399        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
5400        // single-column table at SELECT entry by evaluating the
5401        // expression once against the empty row (UNNEST is
5402        // uncorrelated in v7.11; correlated / LATERAL unnest is a
5403        // v7.12 carve-out). Build a virtual `Table` in a heap-only
5404        // catalog, then route to the regular scan path.
5405        if from.primary.unnest_expr.is_some() {
5406            return self.exec_select_unnest(stmt, &from.primary, cancel);
5407        }
5408        let primary = &from.primary;
5409        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
5410            StorageError::TableNotFound {
5411                name: primary.name.clone(),
5412            }
5413        })?;
5414        let schema_cols = &table.schema().columns;
5415        // The qualifier accepted on column refs is the alias (if any) else the
5416        // bare table name.
5417        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
5418        let ctx = self.ev_ctx(schema_cols, Some(alias));
5419
5420        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
5421        // WHERE and an NSW index on `col` skips the full scan. The
5422        // walk returns rows already in ascending-distance order, so
5423        // ORDER BY / LIMIT are honoured implicitly.
5424        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
5425            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
5426        }
5427
5428        // Index seek: if WHERE is `col = literal` (or commuted) and the
5429        // referenced column has an index, dispatch each locator through
5430        // the catalog (hot tier → borrow, cold tier → page-read +
5431        // decode) and iterate just those rows. Otherwise fall back to a
5432        // full scan over the hot tier (cold-tier rows are only reached
5433        // via index seek in v5.1 — full table scans against cold-tier
5434        // data ship in v5.2 with the freezer's per-segment scan API).
5435        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt.where_.as_ref().and_then(|w| {
5436            // BTree / col=literal seek first — covers the v7.11.3 multi-
5437            // column AND case and the leading-column equality lookup.
5438            try_index_seek(w, schema_cols, self.active_catalog(), table, alias)
5439                .or_else(|| {
5440                    // v7.12.3 — GIN-accelerated `WHERE col @@
5441                    // tsquery` when the column has a `USING gin`
5442                    // index. Returns an over-approximate candidate
5443                    // set; the WHERE re-eval loop below verifies
5444                    // the full `@@` predicate per row.
5445                    try_gin_seek(w, schema_cols, self.active_catalog(), table, alias, &ctx)
5446                })
5447                .or_else(|| {
5448                    // v7.15.0 — trigram-GIN-accelerated
5449                    // `WHERE col LIKE / ILIKE '<pat>'` when the
5450                    // column has a `gin_trgm_ops` GIN index.
5451                    // Over-approximate candidate set; the WHERE
5452                    // re-eval verifies the LIKE per row.
5453                    try_trgm_seek(w, schema_cols, table, alias)
5454                })
5455        });
5456
5457        // Aggregate path: filter rows first, then hand off to the
5458        // aggregate executor which does its own projection + ORDER BY.
5459        if aggregate::uses_aggregate(stmt) {
5460            let mut filtered: Vec<&Row> = Vec::new();
5461            // v6.2.6 — Memoize: per-query LRU cache for correlated
5462            // scalar subqueries. Fresh per row-loop entry so each
5463            // SELECT execution gets an isolated cache.
5464            let mut memo = memoize::MemoizeCache::new();
5465            if let Some(rows) = &indexed_rows {
5466                for cow in rows {
5467                    let row = cow.as_ref();
5468                    if let Some(where_expr) = &stmt.where_ {
5469                        let cond = self.eval_expr_with_correlated(
5470                            where_expr,
5471                            row,
5472                            &ctx,
5473                            cancel,
5474                            Some(&mut memo),
5475                        )?;
5476                        if !matches!(cond, Value::Bool(true)) {
5477                            continue;
5478                        }
5479                    }
5480                    filtered.push(row);
5481                }
5482            } else {
5483                for i in 0..table.row_count() {
5484                    let row = &table.rows()[i];
5485                    if let Some(where_expr) = &stmt.where_ {
5486                        let cond = self.eval_expr_with_correlated(
5487                            where_expr,
5488                            row,
5489                            &ctx,
5490                            cancel,
5491                            Some(&mut memo),
5492                        )?;
5493                        if !matches!(cond, Value::Bool(true)) {
5494                            continue;
5495                        }
5496                    }
5497                    filtered.push(row);
5498                }
5499            }
5500            let mut agg = aggregate::run(stmt, &filtered, schema_cols, Some(alias))?;
5501            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5502            return Ok(QueryResult::Rows {
5503                columns: agg.columns,
5504                rows: agg.rows,
5505            });
5506        }
5507
5508        let projection = build_projection(&stmt.items, schema_cols, alias)?;
5509
5510        // Materialise the filter pass into `(order_key, projected_row)`
5511        // tuples. The order key is `None` when there's no ORDER BY clause.
5512        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5513        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
5514        let mut memo = memoize::MemoizeCache::new();
5515        // Inline the per-row work in a closure so the indexed and full-
5516        // scan branches share the body.
5517        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
5518            if loop_idx.is_multiple_of(256) {
5519                cancel.check()?;
5520            }
5521            if let Some(where_expr) = &stmt.where_ {
5522                let cond =
5523                    self.eval_expr_with_correlated(where_expr, row, &ctx, cancel, Some(&mut memo))?;
5524                if !matches!(cond, Value::Bool(true)) {
5525                    return Ok(());
5526                }
5527            }
5528            let mut values = Vec::with_capacity(projection.len());
5529            for p in &projection {
5530                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5531            }
5532            let order_keys = if stmt.order_by.is_empty() {
5533                Vec::new()
5534            } else {
5535                build_order_keys(&stmt.order_by, row, &ctx)?
5536            };
5537            tagged.push((order_keys, Row::new(values)));
5538            Ok(())
5539        };
5540        if let Some(rows) = &indexed_rows {
5541            for (loop_idx, cow) in rows.iter().enumerate() {
5542                process_row(cow.as_ref(), loop_idx)?;
5543            }
5544        } else {
5545            for i in 0..table.row_count() {
5546                process_row(&table.rows()[i], i)?;
5547            }
5548        }
5549
5550        if !stmt.order_by.is_empty() {
5551            // Partial-sort fast path: when LIMIT is small relative to
5552            // the row count, select_nth_unstable + sort just the
5553            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
5554            // requires the full sort because de-dup happens after.
5555            let keep = if stmt.distinct {
5556                None
5557            } else {
5558                stmt.limit_literal()
5559                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5560            };
5561            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5562            partial_sort_tagged(&mut tagged, keep, &descs);
5563        }
5564
5565        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5566        if stmt.distinct {
5567            output_rows = dedup_rows(output_rows);
5568        }
5569        apply_offset_and_limit(
5570            &mut output_rows,
5571            stmt.offset_literal(),
5572            stmt.limit_literal(),
5573        );
5574
5575        let columns: Vec<ColumnSchema> = projection
5576            .into_iter()
5577            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5578            .collect();
5579
5580        Ok(QueryResult::Rows {
5581            columns,
5582            rows: output_rows,
5583        })
5584    }
5585
5586    /// Multi-table SELECT executor (one or more JOIN peers).
5587    ///
5588    /// v1.10 builds the joined row set up-front via nested-loop joins,
5589    /// then runs WHERE + projection + ORDER BY against the combined
5590    /// rows. No index seek. Aggregates and DISTINCT still work because
5591    /// the executor delegates projection through the same shared paths.
5592    #[allow(clippy::too_many_lines)]
5593    /// v7.13.2 — mailrs round-6 S5. Resolve a TableRef into an
5594    /// owned (rows, schema) pair. Catalog tables clone their hot
5595    /// rows + schema; UNNEST table refs evaluate their array
5596    /// expression once and synthesise a single-column row set
5597    /// using the same dispatch as `exec_select_unnest`. Used by
5598    /// the joined-select path so UNNEST can appear in any FROM
5599    /// position, not just as the primary.
5600    fn materialise_table_ref(
5601        &self,
5602        tref: &TableRef,
5603    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
5604        if let Some(expr) = tref.unnest_expr.as_deref() {
5605            let empty_schema: Vec<ColumnSchema> = Vec::new();
5606            let ctx = EvalContext::new(&empty_schema, None);
5607            let dummy_row = Row::new(Vec::new());
5608            let (elem_dtype, rows) =
5609                match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5610                    Value::Null => (DataType::Text, Vec::new()),
5611                    Value::TextArray(items) => (
5612                        DataType::Text,
5613                        items
5614                            .into_iter()
5615                            .map(|item| {
5616                                Row::new(alloc::vec![match item {
5617                                    Some(s) => Value::Text(s),
5618                                    None => Value::Null,
5619                                }])
5620                            })
5621                            .collect(),
5622                    ),
5623                    Value::IntArray(items) => (
5624                        DataType::Int,
5625                        items
5626                            .into_iter()
5627                            .map(|item| {
5628                                Row::new(alloc::vec![match item {
5629                                    Some(n) => Value::Int(n),
5630                                    None => Value::Null,
5631                                }])
5632                            })
5633                            .collect(),
5634                    ),
5635                    Value::BigIntArray(items) => (
5636                        DataType::BigInt,
5637                        items
5638                            .into_iter()
5639                            .map(|item| {
5640                                Row::new(alloc::vec![match item {
5641                                    Some(n) => Value::BigInt(n),
5642                                    None => Value::Null,
5643                                }])
5644                            })
5645                            .collect(),
5646                    ),
5647                    other => {
5648                        return Err(EngineError::Unsupported(alloc::format!(
5649                            "unnest() expects an array argument, got {:?}",
5650                            other.data_type()
5651                        )));
5652                    }
5653                };
5654            let alias = tref.alias.clone().unwrap_or_else(|| "unnest".to_string());
5655            let col_name = tref
5656                .unnest_column_aliases
5657                .first()
5658                .cloned()
5659                .unwrap_or(alias);
5660            return Ok((rows, alloc::vec![ColumnSchema::new(col_name, elem_dtype, true)]));
5661        }
5662        let table = self
5663            .active_catalog()
5664            .get(&tref.name)
5665            .ok_or_else(|| StorageError::TableNotFound {
5666                name: tref.name.clone(),
5667            })?;
5668        let rows: Vec<Row> = table.rows().iter().cloned().collect();
5669        let cols = table.schema().columns.clone();
5670        Ok((rows, cols))
5671    }
5672
5673    fn exec_joined_select(
5674        &self,
5675        stmt: &SelectStatement,
5676        from: &FromClause,
5677    ) -> Result<QueryResult, EngineError> {
5678        // v7.13.2 — mailrs round-6 S5. UNNEST peers materialise
5679        // into virtual (rows, schema) sources alongside catalog
5680        // tables, so `FROM t, UNNEST(arr) AS p(col)` works in
5681        // any join-list position. The lookup helper handles both
5682        // shapes uniformly.
5683        let (primary_rows, primary_cols) = self.materialise_table_ref(&from.primary)?;
5684        let primary_alias = from
5685            .primary
5686            .alias
5687            .as_deref()
5688            .unwrap_or(from.primary.name.as_str())
5689            .to_string();
5690        // Owned (rows, schema) per peer — borrows from the catalog
5691        // would not survive UNNEST-side materialisation.
5692        let mut joined: Vec<(Vec<Row>, Vec<ColumnSchema>, String, JoinKind, Option<&Expr>)> =
5693            Vec::new();
5694        for j in &from.joins {
5695            let (rows, cols) = self.materialise_table_ref(&j.table)?;
5696            let a = j
5697                .table
5698                .alias
5699                .as_deref()
5700                .unwrap_or(j.table.name.as_str())
5701                .to_string();
5702            joined.push((rows, cols, a, j.kind, j.on.as_ref()));
5703        }
5704
5705        // Build the combined schema: composite "alias.col" names so the
5706        // qualified-column resolver can find anything by exact match.
5707        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
5708        for col in &primary_cols {
5709            combined_schema.push(ColumnSchema::new(
5710                alloc::format!("{primary_alias}.{}", col.name),
5711                col.ty,
5712                col.nullable,
5713            ));
5714        }
5715        for (_, cols, a, _, _) in &joined {
5716            for col in cols {
5717                combined_schema.push(ColumnSchema::new(
5718                    alloc::format!("{a}.{}", col.name),
5719                    col.ty,
5720                    col.nullable,
5721                ));
5722            }
5723        }
5724        let ctx = EvalContext::new(&combined_schema, None);
5725
5726        // Nested-loop join.
5727        let mut working: Vec<Row> = primary_rows;
5728        let mut produced_len = primary_cols.len();
5729        for (rrows, rcols, _, kind, on) in &joined {
5730            let right_arity = rcols.len();
5731            let mut next: Vec<Row> = Vec::new();
5732            for left in &working {
5733                let mut left_matched = false;
5734                for right in rrows {
5735                    let mut combined_vals = left.values.clone();
5736                    combined_vals.extend(right.values.iter().cloned());
5737                    // Pad combined to the eventual full width so the
5738                    // partial schema still matches positions used by ON.
5739                    let combined = Row::new(combined_vals);
5740                    let keep = if let Some(on_expr) = on {
5741                        let cond = eval::eval_expr(on_expr, &combined, &ctx)?;
5742                        matches!(cond, Value::Bool(true))
5743                    } else {
5744                        // CROSS / comma-list: every pair survives.
5745                        true
5746                    };
5747                    if keep {
5748                        next.push(combined);
5749                        left_matched = true;
5750                    }
5751                }
5752                if !left_matched && matches!(kind, JoinKind::Left) {
5753                    // LEFT OUTER JOIN: emit the left row with NULLs on
5754                    // the right side when no peer matched.
5755                    let mut combined_vals = left.values.clone();
5756                    for _ in 0..right_arity {
5757                        combined_vals.push(Value::Null);
5758                    }
5759                    next.push(Row::new(combined_vals));
5760                }
5761            }
5762            working = next;
5763            produced_len += right_arity;
5764            debug_assert!(produced_len <= combined_schema.len());
5765        }
5766
5767        // WHERE filter against combined rows.
5768        let mut filtered: Vec<Row> = Vec::new();
5769        for row in working {
5770            if let Some(where_expr) = &stmt.where_ {
5771                let cond = eval::eval_expr(where_expr, &row, &ctx)?;
5772                if !matches!(cond, Value::Bool(true)) {
5773                    continue;
5774                }
5775            }
5776            filtered.push(row);
5777        }
5778
5779        // Aggregate path: handle GROUP BY / aggregate calls over the
5780        // joined+filtered rows.
5781        if aggregate::uses_aggregate(stmt) {
5782            let refs: Vec<&Row> = filtered.iter().collect();
5783            let mut agg = aggregate::run(stmt, &refs, &combined_schema, None)?;
5784            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5785            return Ok(QueryResult::Rows {
5786                columns: agg.columns,
5787                rows: agg.rows,
5788            });
5789        }
5790
5791        let projection = build_projection(&stmt.items, &combined_schema, "")?;
5792        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5793        for row in &filtered {
5794            let mut values = Vec::with_capacity(projection.len());
5795            for p in &projection {
5796                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5797            }
5798            let order_keys = if stmt.order_by.is_empty() {
5799                Vec::new()
5800            } else {
5801                build_order_keys(&stmt.order_by, row, &ctx)?
5802            };
5803            tagged.push((order_keys, Row::new(values)));
5804        }
5805        if !stmt.order_by.is_empty() {
5806            let keep = if stmt.distinct {
5807                None
5808            } else {
5809                stmt.limit_literal()
5810                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5811            };
5812            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5813            partial_sort_tagged(&mut tagged, keep, &descs);
5814        }
5815        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5816        if stmt.distinct {
5817            output_rows = dedup_rows(output_rows);
5818        }
5819        apply_offset_and_limit(
5820            &mut output_rows,
5821            stmt.offset_literal(),
5822            stmt.limit_literal(),
5823        );
5824        let columns: Vec<ColumnSchema> = projection
5825            .into_iter()
5826            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5827            .collect();
5828        Ok(QueryResult::Rows {
5829            columns,
5830            rows: output_rows,
5831        })
5832    }
5833}
5834
5835/// One row-producing projection: an expression to evaluate, the resulting
5836/// column's user-visible name, its inferred type, and nullability.
5837#[derive(Debug, Clone)]
5838struct ProjectedItem {
5839    expr: Expr,
5840    output_name: String,
5841    ty: DataType,
5842    nullable: bool,
5843}
5844
5845/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
5846/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
5847/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
5848/// the spec's "two NULLs are not distinct"; the second is a tolerated
5849/// quirk for v1 (no NaN literals are reachable from the SQL surface).
5850fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
5851    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
5852    for r in rows {
5853        if !out.iter().any(|seen| seen == &r) {
5854            out.push(r);
5855        }
5856    }
5857    out
5858}
5859
5860/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
5861/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
5862/// order via the byte values; vectors are not sortable.
5863fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
5864    match v {
5865        Value::Null => Ok(f64::INFINITY),
5866        Value::SmallInt(n) => Ok(f64::from(*n)),
5867        Value::Int(n) => Ok(f64::from(*n)),
5868        Value::Date(d) => Ok(f64::from(*d)),
5869        #[allow(clippy::cast_precision_loss)]
5870        Value::Timestamp(t) => Ok(*t as f64),
5871        #[allow(clippy::cast_precision_loss)]
5872        Value::Numeric { scaled, scale } => {
5873            // Scaled integer / 10^scale, computed via f64 for sort
5874            // ordering only. Precision losses here only matter for
5875            // ORDER BY tie-breaks well past 15 significant digits.
5876            // `f64::powi` lives in std; we hand-roll the loop so the
5877            // no_std engine crate doesn't need it.
5878            let mut divisor = 1.0_f64;
5879            for _ in 0..*scale {
5880                divisor *= 10.0;
5881            }
5882            Ok((*scaled as f64) / divisor)
5883        }
5884        #[allow(clippy::cast_precision_loss)]
5885        Value::BigInt(n) => Ok(*n as f64),
5886        Value::Float(x) => Ok(*x),
5887        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
5888        Value::Text(s) => {
5889            // Lex order by codepoints — good enough for ORDER BY name.
5890            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
5891            // partial_cmp Equal. v1.x can swap in a real string comparator.
5892            let mut key: u64 = 0;
5893            for &b in s.as_bytes().iter().take(8) {
5894                key = (key << 8) | u64::from(b);
5895            }
5896            #[allow(clippy::cast_precision_loss)]
5897            Ok(key as f64)
5898        }
5899        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
5900            Err(EngineError::Unsupported(
5901                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
5902            ))
5903        }
5904        Value::Interval { .. } => Err(EngineError::Unsupported(
5905            "ORDER BY of an INTERVAL is not supported in v2.11 \
5906             (months vs micros has no single canonical ordering)"
5907                .into(),
5908        )),
5909        Value::Json(_) => Err(EngineError::Unsupported(
5910            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
5911        )),
5912        // v7.5.0 — Value is #[non_exhaustive]; future variants need
5913        // an explicit ORDER BY mapping. Surface as Unsupported until
5914        // engine support is added.
5915        _ => Err(EngineError::Unsupported(
5916            "ORDER BY of this value type is not supported".into(),
5917        )),
5918    }
5919}
5920
5921/// Try to plan a WHERE clause as an equality lookup against an existing
5922/// index. Returns the candidate row indices on success; `None` means the
5923/// caller should fall back to a full scan.
5924///
5925/// v0.8 recognises a single top-level `col = literal` (in either operand
5926/// order). AND chains and range scans land in later milestones.
5927/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
5928/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
5929/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
5930/// present, the planner does an "over-fetch and filter" pass — it
5931/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
5932/// against each, and trims back to `k`. Returns the row indices in
5933/// ascending-distance order when the plan applies.
5934fn try_nsw_knn(
5935    stmt: &SelectStatement,
5936    table: &Table,
5937    schema_cols: &[ColumnSchema],
5938    table_alias: &str,
5939) -> Option<Vec<usize>> {
5940    if stmt.distinct {
5941        return None;
5942    }
5943    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
5944    if limit == 0 {
5945        return None;
5946    }
5947    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
5948    // distance metric. Multi-key ORDER BY falls through to the
5949    // generic sort path.
5950    if stmt.order_by.len() != 1 {
5951        return None;
5952    }
5953    let order = &stmt.order_by[0];
5954    // NSW kNN returns rows ascending by distance — DESC inverts the
5955    // natural order, so the planner can't handle it without a sort
5956    // pass. Fall back to the generic ORDER BY path.
5957    if order.desc {
5958        return None;
5959    }
5960    let Expr::Binary { lhs, op, rhs } = &order.expr else {
5961        return None;
5962    };
5963    let metric = match op {
5964        BinOp::L2Distance => spg_storage::NswMetric::L2,
5965        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
5966        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
5967        _ => return None,
5968    };
5969    // Accept both `col <op> literal` and `literal <op> col`.
5970    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
5971        (lhs.as_ref(), rhs.as_ref())
5972    else {
5973        return None;
5974    };
5975    if let Some(q) = &col.qualifier
5976        && q != table_alias
5977    {
5978        return None;
5979    }
5980    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
5981    let query = literal_to_vector(literal)?;
5982    let idx = spg_storage::nsw_index_on(table, col_pos)?;
5983    if let Some(where_expr) = &stmt.where_ {
5984        // Over-fetch and filter. The factor (10×) is a heuristic that
5985        // covers typical selectivity for the corpus tests; v2.x will
5986        // make it configurable.
5987        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
5988        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
5989        let ctx = EvalContext::new(schema_cols, Some(table_alias));
5990        let mut kept: Vec<usize> = Vec::with_capacity(limit);
5991        for i in candidates {
5992            let row = &table.rows()[i];
5993            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
5994            if matches!(cond, Value::Bool(true)) {
5995                kept.push(i);
5996                if kept.len() >= limit {
5997                    break;
5998                }
5999            }
6000        }
6001        Some(kept)
6002    } else {
6003        Some(spg_storage::nsw_query(
6004            table, &idx.name, &query, limit, metric,
6005        ))
6006    }
6007}
6008
6009/// Lower bound on the over-fetch pool when WHERE is present — even
6010/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
6011/// few WHERE rejections.
6012const NSW_OVER_FETCH_FLOOR: usize = 32;
6013
6014/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
6015/// `None` for anything we can't fold at plan time.
6016fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
6017    match e {
6018        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
6019        Expr::Cast { expr, .. } => literal_to_vector(expr),
6020        _ => None,
6021    }
6022}
6023
6024/// Materialise rows in a planner-supplied order (used by the NSW path)
6025/// without re-running ORDER BY. The projection + LIMIT slot mirror the
6026/// equivalent block in `exec_bare_select`.
6027fn materialise_in_order(
6028    stmt: &SelectStatement,
6029    table: &Table,
6030    schema_cols: &[ColumnSchema],
6031    table_alias: &str,
6032    ordered_rows: &[usize],
6033) -> Result<QueryResult, EngineError> {
6034    let ctx = EvalContext::new(schema_cols, Some(table_alias));
6035    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
6036    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
6037    for &i in ordered_rows {
6038        let row = &table.rows()[i];
6039        let mut values = Vec::with_capacity(projection.len());
6040        for p in &projection {
6041            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
6042        }
6043        output_rows.push(Row::new(values));
6044    }
6045    apply_offset_and_limit(
6046        &mut output_rows,
6047        stmt.offset_literal(),
6048        stmt.limit_literal(),
6049    );
6050    let columns: Vec<ColumnSchema> = projection
6051        .into_iter()
6052        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6053        .collect();
6054    Ok(QueryResult::Rows {
6055        columns,
6056        rows: output_rows,
6057    })
6058}
6059
6060fn try_index_seek<'a>(
6061    where_expr: &Expr,
6062    schema_cols: &[ColumnSchema],
6063    catalog: &'a Catalog,
6064    table: &'a Table,
6065    table_alias: &str,
6066) -> Option<Vec<Cow<'a, Row>>> {
6067    // v7.11.3 — recurse through top-level `AND` so a PG-style
6068    // composite predicate like `WHERE id = 1 AND created_at > $1`
6069    // still hits the index on `id`. The caller re-applies the
6070    // full WHERE expression to each returned row, so dropping the
6071    // residual conjuncts here is correct — the index just narrows
6072    // the candidate set.
6073    if let Expr::Binary {
6074        lhs,
6075        op: BinOp::And,
6076        rhs,
6077    } = where_expr
6078    {
6079        // Try LHS first (typical convention: leading equality on
6080        // the indexed column comes first in user-written SQL).
6081        if let Some(rows) = try_index_seek(lhs, schema_cols, catalog, table, table_alias) {
6082            return Some(rows);
6083        }
6084        return try_index_seek(rhs, schema_cols, catalog, table, table_alias);
6085    }
6086    let Expr::Binary {
6087        lhs,
6088        op: BinOp::Eq,
6089        rhs,
6090    } = where_expr
6091    else {
6092        return None;
6093    };
6094    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
6095        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
6096    let idx = table.index_on(col_pos)?;
6097    let key = IndexKey::from_value(&value)?;
6098    let locators = idx.lookup_eq(&key);
6099    let table_name = table.schema().name.as_str();
6100    // v5.1: each locator dispatches to either the hot tier (zero-
6101    // copy borrow of `table.rows()[i]`) or a cold-tier segment
6102    // (one page read + dense row decode, ~µs scale). Cold rows are
6103    // returned as `Cow::Owned` so the caller's `&Row` iteration
6104    // doesn't see a tier distinction; pre-freezer (no cold
6105    // segments loaded) every locator is `Hot` and every entry is
6106    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
6107    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
6108    for loc in locators {
6109        match *loc {
6110            spg_storage::RowLocator::Hot(i) => {
6111                if let Some(row) = table.rows().get(i) {
6112                    out.push(Cow::Borrowed(row));
6113                }
6114            }
6115            spg_storage::RowLocator::Cold { segment_id, .. } => {
6116                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
6117                    out.push(Cow::Owned(row));
6118                }
6119            }
6120        }
6121    }
6122    Some(out)
6123}
6124
6125/// v7.12.3 — GIN-accelerated candidate seek for `WHERE col @@ <ts_query>`.
6126///
6127/// Recurses through top-level `AND` like [`try_index_seek`] so a
6128/// composite predicate `WHERE search_vector @@ q AND id > $1` still
6129/// hits the GIN index on `search_vector` — the caller re-applies the
6130/// full WHERE expression to each returned candidate, so dropping the
6131/// `id > $1` residual here stays semantically correct.
6132///
6133/// Returns `None` when:
6134///   - no leaf is a `col @@ <rhs>` shape on a GIN-indexed column;
6135///   - the RHS can't be const-evaluated to a `Value::TsQuery`
6136///     (typically because it references row columns);
6137///   - the resolved `TsQuery` uses query shapes the MVP doesn't
6138///     accelerate (`Not`, `Phrase` — those fall through to full scan).
6139///
6140/// On `Some(rows)` the caller iterates only `rows` and re-evaluates
6141/// the full `@@` predicate per row, so an over-approximate candidate
6142/// set is safe.
6143fn try_gin_seek<'a>(
6144    where_expr: &Expr,
6145    schema_cols: &[ColumnSchema],
6146    catalog: &'a Catalog,
6147    table: &'a Table,
6148    table_alias: &str,
6149    ctx: &eval::EvalContext<'_>,
6150) -> Option<Vec<Cow<'a, Row>>> {
6151    if let Expr::Binary {
6152        lhs,
6153        op: BinOp::And,
6154        rhs,
6155    } = where_expr
6156    {
6157        if let Some(rows) = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx) {
6158            return Some(rows);
6159        }
6160        return try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx);
6161    }
6162    let Expr::Binary {
6163        lhs,
6164        op: BinOp::TsMatch,
6165        rhs,
6166    } = where_expr
6167    else {
6168        return None;
6169    };
6170    // Either side can be the column; pgvector idiom (`vec @@ q`)
6171    // hits the first arm, FROM-clause-derived (`plainto_tsquery($1)
6172    // q ... WHERE search_vector @@ q`) the same. CROSS JOIN derived
6173    // tables resolve `q` to a Column too.
6174    let (col_pos, query) = resolve_gin_col_query(lhs, rhs, schema_cols, table_alias, ctx)
6175        .or_else(|| resolve_gin_col_query(rhs, lhs, schema_cols, table_alias, ctx))?;
6176    let idx = table
6177        .indices()
6178        .iter()
6179        .find(|i| i.column_position == col_pos && i.is_gin())?;
6180    let candidates = gin_query_candidates(idx, &query)?;
6181    let _ = catalog; // cold-tier row resolution unused in MVP; see below.
6182    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(candidates.len());
6183    for loc in candidates {
6184        match loc {
6185            spg_storage::RowLocator::Hot(i) => {
6186                if let Some(row) = table.rows().get(i) {
6187                    out.push(Cow::Borrowed(row));
6188                }
6189            }
6190            // GIN cold-tier rows in the MVP: skipped, matching the
6191            // full-scan `@@` path which itself only iterates
6192            // `table.rows()` (hot tier). When v7.13+ adds cold-tier
6193            // scan-time materialisation for `@@`, the parallel
6194            // resolution lands here; until then both paths see the
6195            // same hot-only candidate set so correctness is preserved.
6196            spg_storage::RowLocator::Cold { .. } => {}
6197        }
6198    }
6199    Some(out)
6200}
6201
6202/// v7.15.0 — trigram-GIN-accelerated candidate seek for
6203/// `WHERE col LIKE '<pat>'` and `WHERE col ILIKE '<pat>'` when
6204/// the column has a `gin_trgm_ops` GIN index.
6205///
6206/// Walks top-level `AND` so multi-predicate WHEREs (`col LIKE
6207/// 'foo%' AND id > 1`) still hit the trigram index; the caller
6208/// re-evaluates the full WHERE per candidate row, so dropping
6209/// non-LIKE conjuncts here stays semantically correct.
6210///
6211/// Returns `None` when:
6212///   - no leaf is `col LIKE/ILIKE <literal>` on a trigram-GIN-
6213///     indexed column;
6214///   - the pattern's literal runs are too short to constrain
6215///     (pattern decomposes into `< 3`-char runs, e.g. `%ab%`);
6216///   - the pattern doesn't const-evaluate to a TEXT.
6217fn try_trgm_seek<'a>(
6218    where_expr: &Expr,
6219    schema_cols: &[ColumnSchema],
6220    table: &'a Table,
6221    table_alias: &str,
6222) -> Option<Vec<Cow<'a, Row>>> {
6223    if let Expr::Binary {
6224        lhs,
6225        op: BinOp::And,
6226        rhs,
6227    } = where_expr
6228    {
6229        if let Some(rows) = try_trgm_seek(lhs, schema_cols, table, table_alias) {
6230            return Some(rows);
6231        }
6232        return try_trgm_seek(rhs, schema_cols, table, table_alias);
6233    }
6234    // LIKE node is what carries the column reference + pattern.
6235    // ILIKE is the same AST node — PG's LIKE/ILIKE both lower
6236    // through `Expr::Like { expr, pattern, negated }`. The trigram
6237    // index posting-list keys are already lower-cased and
6238    // case-folded, so we only need the pattern's literal runs.
6239    let Expr::Like {
6240        expr, pattern, ..
6241    } = where_expr
6242    else {
6243        return None;
6244    };
6245    // Column side.
6246    let Expr::Column(c) = expr.as_ref() else {
6247        return None;
6248    };
6249    if let Some(q) = &c.qualifier
6250        && q != table_alias
6251    {
6252        return None;
6253    }
6254    let col_pos = schema_cols
6255        .iter()
6256        .position(|s| s.name.eq_ignore_ascii_case(&c.name))?;
6257    // Index must exist on that column AND be a trigram-GIN.
6258    let idx = table
6259        .indices()
6260        .iter()
6261        .find(|i| i.column_position == col_pos && i.is_gin_trgm())?;
6262    // Pattern side must be a literal TEXT — anything else (column
6263    // ref, function call, parameter that hasn't been bound yet)
6264    // falls through to full scan.
6265    let Expr::Literal(spg_sql::ast::Literal::String(pat)) = pattern.as_ref() else {
6266        return None;
6267    };
6268    let trigrams = spg_storage::trgm::trigrams_from_like_pattern(pat)?;
6269    // Intersect every trigram's posting list. Empty intersection
6270    // → empty candidate set (caller short-circuits its row loop).
6271    let mut iter = trigrams.iter();
6272    let first = iter.next()?;
6273    let mut acc: Vec<spg_storage::RowLocator> = {
6274        let mut v = idx.gin_trgm_lookup(first).to_vec();
6275        v.sort_by_key(locator_sort_key);
6276        v.dedup_by_key(|l| locator_sort_key(l));
6277        v
6278    };
6279    for tri in iter {
6280        let mut next: Vec<spg_storage::RowLocator> = idx.gin_trgm_lookup(tri).to_vec();
6281        next.sort_by_key(locator_sort_key);
6282        next.dedup_by_key(|l| locator_sort_key(l));
6283        // Sorted-merge intersection.
6284        let mut merged: Vec<spg_storage::RowLocator> = Vec::with_capacity(acc.len().min(next.len()));
6285        let (mut i, mut j) = (0usize, 0usize);
6286        while i < acc.len() && j < next.len() {
6287            let lk = locator_sort_key(&acc[i]);
6288            let rk = locator_sort_key(&next[j]);
6289            match lk.cmp(&rk) {
6290                core::cmp::Ordering::Less => i += 1,
6291                core::cmp::Ordering::Greater => j += 1,
6292                core::cmp::Ordering::Equal => {
6293                    merged.push(acc[i]);
6294                    i += 1;
6295                    j += 1;
6296                }
6297            }
6298        }
6299        acc = merged;
6300        if acc.is_empty() {
6301            break;
6302        }
6303    }
6304    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(acc.len());
6305    for loc in acc {
6306        if let spg_storage::RowLocator::Hot(i) = loc
6307            && let Some(row) = table.rows().get(i)
6308        {
6309            out.push(Cow::Borrowed(row));
6310        }
6311        // Cold-tier rows: skipped in MVP (same as try_gin_seek).
6312    }
6313    Some(out)
6314}
6315
6316/// v7.12.3 — extract `(column_position, TsQueryAst)` when one side of
6317/// the binary is a column reference to a GIN-indexed tsvector column
6318/// and the other side const-evaluates to a `Value::TsQuery`. Returns
6319/// `None` if the column reference is for the wrong table alias, or if
6320/// the RHS expression depends on row data.
6321fn resolve_gin_col_query(
6322    col_side: &Expr,
6323    query_side: &Expr,
6324    schema_cols: &[ColumnSchema],
6325    table_alias: &str,
6326    ctx: &eval::EvalContext<'_>,
6327) -> Option<(usize, spg_storage::TsQueryAst)> {
6328    let Expr::Column(c) = col_side else {
6329        return None;
6330    };
6331    if let Some(q) = &c.qualifier
6332        && q != table_alias
6333    {
6334        return None;
6335    }
6336    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6337    // Const-evaluate the query side with an empty row — fails fast
6338    // (with a `ColumnNotFound` / similar) if the expression actually
6339    // depends on row data, which is exactly the bail signal we want.
6340    let empty_row = Row::new(Vec::new());
6341    let v = eval::eval_expr(query_side, &empty_row, ctx).ok()?;
6342    let Value::TsQuery(q) = v else { return None };
6343    Some((pos, q))
6344}
6345
6346/// v7.12.3 — walk a `TsQueryAst` against an [`IndexKind::Gin`] index
6347/// to produce a candidate row-locator set. Returns `None` for query
6348/// shapes the MVP doesn't accelerate (`Not` / `Phrase` — both bail to
6349/// full scan since their semantics need either complementation across
6350/// the whole row set or positional verification beyond what the
6351/// posting list carries).
6352///
6353/// Candidate sets are over-approximate — the caller re-applies the
6354/// full `@@` predicate per row, so reporting "row was in some
6355/// posting list" without verifying positions / weights stays correct.
6356fn gin_query_candidates(
6357    idx: &spg_storage::Index,
6358    query: &spg_storage::TsQueryAst,
6359) -> Option<Vec<spg_storage::RowLocator>> {
6360    use spg_storage::TsQueryAst;
6361    match query {
6362        TsQueryAst::Term { word, .. } => {
6363            let mut v: Vec<spg_storage::RowLocator> = idx.gin_lookup_word(word).to_vec();
6364            v.sort_by_key(locator_sort_key);
6365            v.dedup_by_key(|l| locator_sort_key(l));
6366            Some(v)
6367        }
6368        TsQueryAst::And(l, r) => {
6369            let mut left = gin_query_candidates(idx, l)?;
6370            let mut right = gin_query_candidates(idx, r)?;
6371            left.sort_by_key(locator_sort_key);
6372            right.sort_by_key(locator_sort_key);
6373            // Sorted-merge intersection.
6374            let mut out: Vec<spg_storage::RowLocator> = Vec::new();
6375            let (mut i, mut j) = (0usize, 0usize);
6376            while i < left.len() && j < right.len() {
6377                let lk = locator_sort_key(&left[i]);
6378                let rk = locator_sort_key(&right[j]);
6379                match lk.cmp(&rk) {
6380                    core::cmp::Ordering::Less => i += 1,
6381                    core::cmp::Ordering::Greater => j += 1,
6382                    core::cmp::Ordering::Equal => {
6383                        out.push(left[i]);
6384                        i += 1;
6385                        j += 1;
6386                    }
6387                }
6388            }
6389            Some(out)
6390        }
6391        TsQueryAst::Or(l, r) => {
6392            let mut out = gin_query_candidates(idx, l)?;
6393            out.extend(gin_query_candidates(idx, r)?);
6394            out.sort_by_key(locator_sort_key);
6395            out.dedup_by_key(|l| locator_sort_key(l));
6396            Some(out)
6397        }
6398        // Not / Phrase bail to full scan in the MVP. Not needs
6399        // complementation against the whole row set (not represented
6400        // in the posting-list view); Phrase needs positional
6401        // verification beyond what `word → rows` carries.
6402        TsQueryAst::Not(_) | TsQueryAst::Phrase { .. } => None,
6403    }
6404}
6405
6406/// v7.12.3 — total ordering on `RowLocator` for sort/dedup purposes
6407/// inside the GIN intersection / union loops. Hot rows order by their
6408/// row index; Cold rows order after all Hot rows, then by
6409/// `(segment_id, the cold sub-key)`.
6410fn locator_sort_key(l: &spg_storage::RowLocator) -> (u8, u64, u64) {
6411    match *l {
6412        spg_storage::RowLocator::Hot(i) => (0, i as u64, 0),
6413        spg_storage::RowLocator::Cold {
6414            segment_id,
6415            page_offset,
6416        } => (1, u64::from(segment_id), u64::from(page_offset)),
6417    }
6418}
6419
6420/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
6421/// is a simple `col = literal` predicate suitable for a `BTree` index
6422/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
6423/// decide whether a write touches a cold-tier row (which requires
6424/// promote-on-write / shadow-on-delete) before falling through to
6425/// the hot-tier row walk.
6426///
6427/// Returns `None` for any predicate shape the planner can't push
6428/// down to an index seek — complex WHERE clauses always take the
6429/// hot-only path (cold rows are immutable to non-indexed writes
6430/// until a future scan-fanout sub-version).
6431fn try_pk_predicate(
6432    where_expr: &Expr,
6433    schema_cols: &[ColumnSchema],
6434    table_alias: &str,
6435) -> Option<(usize, IndexKey)> {
6436    let Expr::Binary {
6437        lhs,
6438        op: BinOp::Eq,
6439        rhs,
6440    } = where_expr
6441    else {
6442        return None;
6443    };
6444    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
6445        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
6446    let key = IndexKey::from_value(&value)?;
6447    Some((col_pos, key))
6448}
6449
6450fn resolve_col_literal_pair(
6451    col_side: &Expr,
6452    lit_side: &Expr,
6453    schema_cols: &[ColumnSchema],
6454    table_alias: &str,
6455) -> Option<(usize, Value)> {
6456    let Expr::Column(c) = col_side else {
6457        return None;
6458    };
6459    if let Some(q) = &c.qualifier
6460        && q != table_alias
6461    {
6462        return None;
6463    }
6464    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6465    let Expr::Literal(l) = lit_side else {
6466        return None;
6467    };
6468    let v = match l {
6469        Literal::Integer(n) => {
6470            if let Ok(small) = i32::try_from(*n) {
6471                Value::Int(small)
6472            } else {
6473                Value::BigInt(*n)
6474            }
6475        }
6476        Literal::Float(x) => Value::Float(*x),
6477        Literal::String(s) => Value::Text(s.clone()),
6478        Literal::Bool(b) => Value::Bool(*b),
6479        Literal::Null => Value::Null,
6480        // Vector and Interval literals can't be used as B-tree index keys.
6481        // Tell the planner to fall back to full-scan.
6482        Literal::Vector(_) | Literal::Interval { .. } => return None,
6483    };
6484    Some((pos, v))
6485}
6486
6487/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
6488/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
6489/// `EngineError` so the projection-build path keeps `UnknownQualifier`
6490/// vs `ColumnNotFound` distinct.
6491fn resolve_projection_column<'a>(
6492    c: &ColumnName,
6493    schema_cols: &'a [ColumnSchema],
6494    table_alias: &str,
6495) -> Result<&'a ColumnSchema, EngineError> {
6496    if let Some(q) = &c.qualifier {
6497        let composite = alloc::format!("{q}.{name}", name = c.name);
6498        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
6499            return Ok(s);
6500        }
6501        // Single-table case: the qualifier may equal the active alias —
6502        // then look for the bare column name.
6503        if q == table_alias
6504            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
6505        {
6506            return Ok(s);
6507        }
6508        // For multi-table schemas the qualifier is unknown only if no
6509        // column bears the "<q>." prefix. For single-table, the alias
6510        // mismatch alone is enough.
6511        let prefix = alloc::format!("{q}.");
6512        let qualifier_known =
6513            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
6514        if !qualifier_known {
6515            return Err(EngineError::Eval(EvalError::UnknownQualifier {
6516                qualifier: q.clone(),
6517            }));
6518        }
6519        return Err(EngineError::Eval(EvalError::ColumnNotFound {
6520            name: c.name.clone(),
6521        }));
6522    }
6523    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
6524        return Ok(s);
6525    }
6526    let suffix = alloc::format!(".{name}", name = c.name);
6527    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
6528    let first = matches.next();
6529    let extra = matches.next();
6530    match (first, extra) {
6531        (Some(s), None) => Ok(s),
6532        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
6533            detail: alloc::format!("ambiguous column reference: {}", c.name),
6534        })),
6535        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
6536            name: c.name.clone(),
6537        })),
6538    }
6539}
6540
6541fn build_projection(
6542    items: &[SelectItem],
6543    schema_cols: &[ColumnSchema],
6544    table_alias: &str,
6545) -> Result<Vec<ProjectedItem>, EngineError> {
6546    let mut out = Vec::new();
6547    for item in items {
6548        match item {
6549            SelectItem::Wildcard => {
6550                for col in schema_cols {
6551                    out.push(ProjectedItem {
6552                        expr: Expr::Column(ColumnName {
6553                            qualifier: None,
6554                            name: col.name.clone(),
6555                        }),
6556                        output_name: col.name.clone(),
6557                        ty: col.ty,
6558                        nullable: col.nullable,
6559                    });
6560                }
6561            }
6562            SelectItem::Expr { expr, alias } => {
6563                // Plain column ref keeps full schema info (real type +
6564                // nullability). Compound expressions evaluate fine but have
6565                // no static type — surface them as nullable TEXT, which is
6566                // what most clients render anyway.
6567                if let Expr::Column(c) = expr {
6568                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
6569                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
6570                    out.push(ProjectedItem {
6571                        expr: expr.clone(),
6572                        output_name,
6573                        ty: sch.ty,
6574                        nullable: sch.nullable,
6575                    });
6576                } else {
6577                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
6578                    out.push(ProjectedItem {
6579                        expr: expr.clone(),
6580                        output_name,
6581                        ty: DataType::Text,
6582                        nullable: true,
6583                    });
6584                }
6585            }
6586        }
6587    }
6588    Ok(out)
6589}
6590
6591/// Promote an integer to a NUMERIC value at the requested scale.
6592/// Rejects values that, after scaling, would overflow the column's
6593/// precision budget.
6594fn numeric_from_integer(
6595    n: i128,
6596    precision: u8,
6597    scale: u8,
6598    col_name: &str,
6599) -> Result<Value, EngineError> {
6600    let factor = pow10_i128(scale);
6601    let scaled = n.checked_mul(factor).ok_or_else(|| {
6602        EngineError::Unsupported(alloc::format!(
6603            "integer overflow scaling value for column `{col_name}` to scale {scale}"
6604        ))
6605    })?;
6606    check_precision(scaled, precision, col_name)?;
6607    Ok(Value::Numeric { scaled, scale })
6608}
6609
6610/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
6611/// then verifies the result fits the column's precision.
6612#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
6613fn numeric_from_float(
6614    x: f64,
6615    precision: u8,
6616    scale: u8,
6617    col_name: &str,
6618) -> Result<Value, EngineError> {
6619    if !x.is_finite() {
6620        return Err(EngineError::Unsupported(alloc::format!(
6621            "cannot store non-finite float in NUMERIC column `{col_name}`"
6622        )));
6623    }
6624    let mut factor = 1.0_f64;
6625    for _ in 0..scale {
6626        factor *= 10.0;
6627    }
6628    // Round half-away-from-zero by biasing then casting (`as i128`
6629    // truncates toward zero, so the bias + truncation gives the
6630    // desired rounding). `f64::floor` / `ceil` live in std; we don't
6631    // need them — the cast handles the truncation step.
6632    let shifted = x * factor;
6633    let biased = if shifted >= 0.0 {
6634        shifted + 0.5
6635    } else {
6636        shifted - 0.5
6637    };
6638    // Range-check before casting back to i128 — the cast itself is
6639    // saturating in Rust, which would silently truncate huge inputs.
6640    if !(-1e38..=1e38).contains(&biased) {
6641        return Err(EngineError::Unsupported(alloc::format!(
6642            "value {x} overflows NUMERIC range for column `{col_name}`"
6643        )));
6644    }
6645    let scaled = biased as i128;
6646    check_precision(scaled, precision, col_name)?;
6647    Ok(Value::Numeric { scaled, scale })
6648}
6649
6650/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
6651/// multiplies by 10; going down rounds half-away-from-zero.
6652fn numeric_rescale(
6653    scaled: i128,
6654    src_scale: u8,
6655    precision: u8,
6656    dst_scale: u8,
6657    col_name: &str,
6658) -> Result<Value, EngineError> {
6659    let new_scaled = if dst_scale >= src_scale {
6660        let bump = pow10_i128(dst_scale - src_scale);
6661        scaled.checked_mul(bump).ok_or_else(|| {
6662            EngineError::Unsupported(alloc::format!(
6663                "overflow rescaling NUMERIC for column `{col_name}`"
6664            ))
6665        })?
6666    } else {
6667        let drop = pow10_i128(src_scale - dst_scale);
6668        let half = drop / 2;
6669        if scaled >= 0 {
6670            (scaled + half) / drop
6671        } else {
6672            (scaled - half) / drop
6673        }
6674    };
6675    check_precision(new_scaled, precision, col_name)?;
6676    Ok(Value::Numeric {
6677        scaled: new_scaled,
6678        scale: dst_scale,
6679    })
6680}
6681
6682/// Drop the fractional part of a scaled integer, returning the integer
6683/// portion (toward zero). Used for NUMERIC → INT casts.
6684const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
6685    if scale == 0 {
6686        return scaled;
6687    }
6688    let factor = pow10_i128_const(scale);
6689    scaled / factor
6690}
6691
6692/// Verify a scaled NUMERIC value fits the column's declared precision.
6693/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
6694/// skip the check there.
6695fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
6696    if precision == 0 {
6697        return Ok(());
6698    }
6699    let limit = pow10_i128(precision);
6700    if scaled.unsigned_abs() >= limit.unsigned_abs() {
6701        return Err(EngineError::Unsupported(alloc::format!(
6702            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
6703        )));
6704    }
6705    Ok(())
6706}
6707
6708const fn pow10_i128_const(p: u8) -> i128 {
6709    let mut acc: i128 = 1;
6710    let mut i = 0;
6711    while i < p {
6712        acc *= 10;
6713        i += 1;
6714    }
6715    acc
6716}
6717
6718fn pow10_i128(p: u8) -> i128 {
6719    pow10_i128_const(p)
6720}
6721
6722/// Walk a parsed `Statement`, swapping any `NOW()` /
6723/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
6724/// literal cast that wraps the engine's per-statement clock reading.
6725/// When `now_micros` is `None`, calls stay as-is and surface as
6726/// `unknown function` at eval time — keeps the error path explicit.
6727/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
6728/// replace every subquery node with a materialised literal. SPG
6729/// only supports uncorrelated subqueries — the inner SELECT does
6730/// not see outer-row columns, so the result is the same for every
6731/// outer row and can be evaluated once.
6732///
6733/// Returns the rewritten statement; the caller passes this to the
6734/// regular row-loop executor which no longer sees Subquery nodes
6735/// in its tree.
6736impl Engine {
6737    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
6738    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
6739    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
6740    /// 1. Apply the WHERE filter.
6741    /// 2. For each unique `WindowFunction` node in the projection,
6742    ///    partition + sort, compute the per-row value.
6743    /// 3. Append the window values as synthetic columns (`__win_N`)
6744    ///    to the row schema.
6745    /// 4. Rewrite the projection to read those columns.
6746    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
6747    #[allow(
6748        clippy::too_many_lines,
6749        clippy::type_complexity,
6750        clippy::needless_range_loop
6751    )] // window-eval is one cohesive pipe; splitting fragments
6752    fn exec_select_with_window(
6753        &self,
6754        stmt: &SelectStatement,
6755        cancel: CancelToken<'_>,
6756    ) -> Result<QueryResult, EngineError> {
6757        let from = stmt.from.as_ref().ok_or_else(|| {
6758            EngineError::Unsupported("window functions require a FROM clause".into())
6759        })?;
6760        // For v4.12 we only support a single-table FROM. Joins +
6761        // windows is queued for v5.x.
6762        if !from.joins.is_empty() {
6763            return Err(EngineError::Unsupported(
6764                "JOIN with window functions not yet supported".into(),
6765            ));
6766        }
6767        let primary = &from.primary;
6768        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
6769            StorageError::TableNotFound {
6770                name: primary.name.clone(),
6771            }
6772        })?;
6773        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
6774        let schema_cols = &table.schema().columns;
6775        let ctx = self.ev_ctx(schema_cols, Some(alias));
6776
6777        // 1) Filter pass.
6778        let mut filtered: Vec<&Row> = Vec::new();
6779        for (i, row) in table.rows().iter().enumerate() {
6780            if i.is_multiple_of(256) {
6781                cancel.check()?;
6782            }
6783            if let Some(w) = &stmt.where_ {
6784                let cond = eval::eval_expr(w, row, &ctx)?;
6785                if !matches!(cond, Value::Bool(true)) {
6786                    continue;
6787                }
6788            }
6789            filtered.push(row);
6790        }
6791        let n_rows = filtered.len();
6792
6793        // 2) Collect unique window function nodes from projection.
6794        let mut window_nodes: Vec<Expr> = Vec::new();
6795        for item in &stmt.items {
6796            if let SelectItem::Expr { expr, .. } = item {
6797                collect_window_nodes(expr, &mut window_nodes);
6798            }
6799        }
6800
6801        // 3) For each window, compute per-row value.
6802        // Index: same order as window_nodes; for row i, win_vals[w][i].
6803        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
6804        for wnode in &window_nodes {
6805            let Expr::WindowFunction {
6806                name,
6807                args,
6808                partition_by,
6809                order_by,
6810                frame,
6811                null_treatment,
6812            } = wnode
6813            else {
6814                unreachable!("collect_window_nodes pushes only WindowFunction");
6815            };
6816            // Compute (partition_key, order_key, original_index) for each row.
6817            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool)>, usize)> =
6818                Vec::with_capacity(n_rows);
6819            for (i, row) in filtered.iter().enumerate() {
6820                let pkey: Vec<Value> = partition_by
6821                    .iter()
6822                    .map(|p| eval::eval_expr(p, row, &ctx))
6823                    .collect::<Result<_, _>>()?;
6824                let okey: Vec<(Value, bool)> = order_by
6825                    .iter()
6826                    .map(|(e, desc)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc)))
6827                    .collect::<Result<_, _>>()?;
6828                indexed.push((pkey, okey, i));
6829            }
6830            // Sort by (partition_key, order_key). Partition key uses
6831            // a stable encoded form; order key respects ASC/DESC.
6832            indexed.sort_by(|a, b| {
6833                let p_cmp = partition_key_cmp(&a.0, &b.0);
6834                if p_cmp != core::cmp::Ordering::Equal {
6835                    return p_cmp;
6836                }
6837                order_key_cmp(&a.1, &b.1)
6838            });
6839            // Per-partition compute.
6840            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
6841            let mut p_start = 0;
6842            while p_start < indexed.len() {
6843                let mut p_end = p_start + 1;
6844                while p_end < indexed.len()
6845                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
6846                        == core::cmp::Ordering::Equal
6847                {
6848                    p_end += 1;
6849                }
6850                // Compute the function within this partition slice.
6851                compute_window_partition(
6852                    name,
6853                    args,
6854                    !order_by.is_empty(),
6855                    frame.as_ref(),
6856                    *null_treatment,
6857                    &indexed[p_start..p_end],
6858                    &filtered,
6859                    &ctx,
6860                    &mut out_vals,
6861                )?;
6862                p_start = p_end;
6863            }
6864            win_vals.push(out_vals);
6865        }
6866
6867        // 4) Build extended schema: original columns + synthetic.
6868        let mut ext_cols = schema_cols.clone();
6869        for i in 0..window_nodes.len() {
6870            ext_cols.push(ColumnSchema::new(
6871                alloc::format!("__win_{i}"),
6872                DataType::Text, // type doesn't matter for projection eval
6873                true,
6874            ));
6875        }
6876        // 5) Build extended rows: each row gets its window values appended.
6877        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
6878        for i in 0..n_rows {
6879            let mut values = filtered[i].values.clone();
6880            for w in 0..window_nodes.len() {
6881                values.push(win_vals[w][i].clone());
6882            }
6883            ext_rows.push(Row::new(values));
6884        }
6885        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
6886        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
6887        for item in &stmt.items {
6888            let new_item = match item {
6889                SelectItem::Wildcard => SelectItem::Wildcard,
6890                SelectItem::Expr { expr, alias } => {
6891                    let mut e = expr.clone();
6892                    rewrite_window_to_columns(&mut e, &window_nodes);
6893                    SelectItem::Expr {
6894                        expr: e,
6895                        alias: alias.clone(),
6896                    }
6897                }
6898            };
6899            rewritten_items.push(new_item);
6900        }
6901
6902        // 7) Project into final rows.
6903        let ext_ctx = EvalContext::new(&ext_cols, Some(alias));
6904        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
6905        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
6906        for (i, row) in ext_rows.iter().enumerate() {
6907            if i.is_multiple_of(256) {
6908                cancel.check()?;
6909            }
6910            let mut values = Vec::with_capacity(projection.len());
6911            for p in &projection {
6912                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
6913            }
6914            let order_keys = if stmt.order_by.is_empty() {
6915                Vec::new()
6916            } else {
6917                let mut keys = Vec::with_capacity(stmt.order_by.len());
6918                for o in &stmt.order_by {
6919                    let mut e = o.expr.clone();
6920                    rewrite_window_to_columns(&mut e, &window_nodes);
6921                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
6922                    keys.push(value_to_order_key(&key)?);
6923                }
6924                keys
6925            };
6926            tagged.push((order_keys, Row::new(values)));
6927        }
6928        // ORDER BY + LIMIT/OFFSET on the projected rows.
6929        if !stmt.order_by.is_empty() {
6930            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
6931            sort_by_keys(&mut tagged, &descs);
6932        }
6933        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
6934        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
6935        let final_cols: Vec<ColumnSchema> = projection
6936            .into_iter()
6937            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6938            .collect();
6939        Ok(QueryResult::Rows {
6940            columns: final_cols,
6941            rows: out_rows,
6942        })
6943    }
6944
6945    /// v4.11: materialise each CTE into a temp table inside a
6946    /// cloned catalog, then run the body SELECT against a fresh
6947    /// engine instance that owns the enriched catalog. The clone
6948    /// is moderately expensive — only paid by CTE-bearing queries.
6949    /// Subqueries inside CTE bodies / the main body resolve as
6950    /// usual; `clock_fn` is propagated so `NOW()` lines up.
6951    fn exec_with_ctes(
6952        &self,
6953        stmt: &SelectStatement,
6954        cancel: CancelToken<'_>,
6955    ) -> Result<QueryResult, EngineError> {
6956        cancel.check()?;
6957        let mut catalog = self.active_catalog().clone();
6958        for cte in &stmt.ctes {
6959            if catalog.get(&cte.name).is_some() {
6960                return Err(EngineError::Unsupported(alloc::format!(
6961                    "CTE name {:?} shadows an existing table; rename the CTE",
6962                    cte.name
6963                )));
6964            }
6965            let (columns, rows) = if cte.recursive {
6966                self.materialise_recursive_cte(cte, &catalog, cancel)?
6967            } else {
6968                let body_result = self.exec_select_cancel(&cte.body, cancel)?;
6969                let QueryResult::Rows { columns, rows } = body_result else {
6970                    return Err(EngineError::Unsupported(alloc::format!(
6971                        "CTE {:?} body did not return rows",
6972                        cte.name
6973                    )));
6974                };
6975                (columns, rows)
6976            };
6977            // v4.22: the projection builder labels any non-column
6978            // expression as Text — including literal SELECT 1.
6979            // Promote each column's type to whatever the rows
6980            // actually carry so the CTE storage table accepts them.
6981            let inferred = infer_column_types(&columns, &rows);
6982            let mut columns = inferred;
6983            // v4.22: apply optional `WITH name(a, b, c)` overrides.
6984            if !cte.column_overrides.is_empty() {
6985                if cte.column_overrides.len() != columns.len() {
6986                    return Err(EngineError::Unsupported(alloc::format!(
6987                        "CTE {:?} column list has {} names but body returns {} columns",
6988                        cte.name,
6989                        cte.column_overrides.len(),
6990                        columns.len()
6991                    )));
6992                }
6993                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
6994                    col.name.clone_from(name);
6995                }
6996            }
6997            let schema = TableSchema::new(cte.name.clone(), columns);
6998            catalog.create_table(schema).map_err(EngineError::Storage)?;
6999            let table = catalog
7000                .get_mut(&cte.name)
7001                .expect("just-created CTE table must exist");
7002            for row in rows {
7003                table.insert(row).map_err(EngineError::Storage)?;
7004            }
7005        }
7006        // Strip CTEs from the body before running on the temp engine
7007        // so we don't recurse forever.
7008        let mut body = stmt.clone();
7009        body.ctes = Vec::new();
7010        let mut temp = Engine::restore(catalog);
7011        if let Some(c) = self.clock {
7012            temp = temp.with_clock(c);
7013        }
7014        if let Some(f) = self.salt_fn {
7015            temp = temp.with_salt_fn(f);
7016        }
7017        temp.exec_select_cancel(&body, cancel)
7018    }
7019
7020    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
7021    /// UNION (or UNION ALL) of an anchor that does not reference
7022    /// the CTE name, and one or more recursive terms that do. The
7023    /// anchor runs first; each subsequent iteration runs the
7024    /// recursive term against a temp catalog where the CTE name is
7025    /// bound to the *previous* iteration's output. Iteration stops
7026    /// when the recursive term yields no rows; UNION (DISTINCT)
7027    /// deduplicates against the accumulated result, UNION ALL does
7028    /// not. A hard cap on total rows prevents runaway queries.
7029    #[allow(clippy::too_many_lines)]
7030    fn materialise_recursive_cte(
7031        &self,
7032        cte: &spg_sql::ast::Cte,
7033        base_catalog: &Catalog,
7034        cancel: CancelToken<'_>,
7035    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
7036        const MAX_TOTAL_ROWS: usize = 1_000_000;
7037        const MAX_ITERATIONS: usize = 100_000;
7038        cancel.check()?;
7039        if cte.body.unions.is_empty() {
7040            return Err(EngineError::Unsupported(alloc::format!(
7041                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
7042                cte.name
7043            )));
7044        }
7045        // Anchor: the body's leading SELECT, with unions stripped.
7046        let mut anchor = cte.body.clone();
7047        let union_terms = core::mem::take(&mut anchor.unions);
7048        anchor.ctes = Vec::new();
7049        // Anchor must not reference the CTE name.
7050        if select_refers_to(&anchor, &cte.name) {
7051            return Err(EngineError::Unsupported(alloc::format!(
7052                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
7053                cte.name
7054            )));
7055        }
7056        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
7057        let QueryResult::Rows {
7058            columns: anchor_cols,
7059            rows: anchor_rows,
7060        } = anchor_result
7061        else {
7062            return Err(EngineError::Unsupported(alloc::format!(
7063                "WITH RECURSIVE {:?}: anchor did not return rows",
7064                cte.name
7065            )));
7066        };
7067        // The projection builder labels non-column expressions Text;
7068        // refine column types from the anchor's actual values so the
7069        // intermediate iter-catalog tables accept them.
7070        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
7071        if !cte.column_overrides.is_empty() {
7072            if cte.column_overrides.len() != columns.len() {
7073                return Err(EngineError::Unsupported(alloc::format!(
7074                    "CTE {:?} column list has {} names but anchor returns {} columns",
7075                    cte.name,
7076                    cte.column_overrides.len(),
7077                    columns.len()
7078                )));
7079            }
7080            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
7081                col.name.clone_from(name);
7082            }
7083        }
7084        let mut all_rows: Vec<Row> = anchor_rows.clone();
7085        let mut working_set: Vec<Row> = anchor_rows;
7086        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
7087        // Track at least one "all UNION ALL" flag — if every union
7088        // kind is ALL we skip the dedup step (faster + matches PG).
7089        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
7090        if !all_union_all {
7091            for r in &all_rows {
7092                seen.insert(encode_row_key(r));
7093            }
7094        }
7095        for iter in 0..MAX_ITERATIONS {
7096            cancel.check()?;
7097            if working_set.is_empty() {
7098                break;
7099            }
7100            // Build a fresh catalog: base + CTE bound to working_set.
7101            let mut iter_catalog = base_catalog.clone();
7102            let schema = TableSchema::new(cte.name.clone(), columns.clone());
7103            iter_catalog
7104                .create_table(schema)
7105                .map_err(EngineError::Storage)?;
7106            {
7107                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
7108                for row in &working_set {
7109                    table.insert(row.clone()).map_err(EngineError::Storage)?;
7110                }
7111            }
7112            let mut iter_engine = Engine::restore(iter_catalog);
7113            if let Some(c) = self.clock {
7114                iter_engine = iter_engine.with_clock(c);
7115            }
7116            if let Some(f) = self.salt_fn {
7117                iter_engine = iter_engine.with_salt_fn(f);
7118            }
7119            // Run each recursive term in sequence and collect new rows.
7120            let mut next_set: Vec<Row> = Vec::new();
7121            for (_, term) in &union_terms {
7122                let mut term = term.clone();
7123                term.ctes = Vec::new();
7124                let r = iter_engine.exec_select_cancel(&term, cancel)?;
7125                let QueryResult::Rows {
7126                    columns: rc,
7127                    rows: rs,
7128                } = r
7129                else {
7130                    return Err(EngineError::Unsupported(alloc::format!(
7131                        "WITH RECURSIVE {:?}: recursive term did not return rows",
7132                        cte.name
7133                    )));
7134                };
7135                if rc.len() != columns.len() {
7136                    return Err(EngineError::Unsupported(alloc::format!(
7137                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
7138                        cte.name,
7139                        rc.len(),
7140                        columns.len()
7141                    )));
7142                }
7143                for row in rs {
7144                    if !all_union_all {
7145                        let key = encode_row_key(&row);
7146                        if !seen.insert(key) {
7147                            continue;
7148                        }
7149                    }
7150                    next_set.push(row);
7151                }
7152            }
7153            if next_set.is_empty() {
7154                break;
7155            }
7156            all_rows.extend(next_set.iter().cloned());
7157            working_set = next_set;
7158            if all_rows.len() > MAX_TOTAL_ROWS {
7159                return Err(EngineError::Unsupported(alloc::format!(
7160                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
7161                    cte.name
7162                )));
7163            }
7164            if iter + 1 == MAX_ITERATIONS {
7165                return Err(EngineError::Unsupported(alloc::format!(
7166                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
7167                    cte.name
7168                )));
7169            }
7170        }
7171        Ok((columns, all_rows))
7172    }
7173
7174    fn resolve_select_subqueries(
7175        &self,
7176        stmt: &mut SelectStatement,
7177        cancel: CancelToken<'_>,
7178    ) -> Result<(), EngineError> {
7179        for item in &mut stmt.items {
7180            if let SelectItem::Expr { expr, .. } = item {
7181                self.resolve_expr_subqueries(expr, cancel)?;
7182            }
7183        }
7184        if let Some(w) = &mut stmt.where_ {
7185            self.resolve_expr_subqueries(w, cancel)?;
7186        }
7187        if let Some(gs) = &mut stmt.group_by {
7188            for g in gs {
7189                self.resolve_expr_subqueries(g, cancel)?;
7190            }
7191        }
7192        if let Some(h) = &mut stmt.having {
7193            self.resolve_expr_subqueries(h, cancel)?;
7194        }
7195        for o in &mut stmt.order_by {
7196            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
7197        }
7198        for (_, peer) in &mut stmt.unions {
7199            self.resolve_select_subqueries(peer, cancel)?;
7200        }
7201        Ok(())
7202    }
7203
7204    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
7205    fn resolve_expr_subqueries(
7206        &self,
7207        e: &mut Expr,
7208        cancel: CancelToken<'_>,
7209    ) -> Result<(), EngineError> {
7210        // Replace-on-this-node cases first.
7211        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
7212            *e = replacement;
7213            return Ok(());
7214        }
7215        match e {
7216            Expr::Binary { lhs, rhs, .. } => {
7217                self.resolve_expr_subqueries(lhs, cancel)?;
7218                self.resolve_expr_subqueries(rhs, cancel)?;
7219            }
7220            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7221                self.resolve_expr_subqueries(expr, cancel)?;
7222            }
7223            Expr::FunctionCall { args, .. } => {
7224                for a in args {
7225                    self.resolve_expr_subqueries(a, cancel)?;
7226                }
7227            }
7228            Expr::Like { expr, pattern, .. } => {
7229                self.resolve_expr_subqueries(expr, cancel)?;
7230                self.resolve_expr_subqueries(pattern, cancel)?;
7231            }
7232            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
7233            // v4.12 window functions — recurse into args + ORDER BY
7234            // + PARTITION BY in case they carry inner subqueries.
7235            Expr::WindowFunction {
7236                args,
7237                partition_by,
7238                order_by,
7239                ..
7240            } => {
7241                for a in args {
7242                    self.resolve_expr_subqueries(a, cancel)?;
7243                }
7244                for p in partition_by {
7245                    self.resolve_expr_subqueries(p, cancel)?;
7246                }
7247                for (e, _) in order_by {
7248                    self.resolve_expr_subqueries(e, cancel)?;
7249                }
7250            }
7251            // Subquery nodes are handled in subquery_replacement
7252            // (which returned None — defensive no-op); Literal /
7253            // Column are leaves.
7254            Expr::ScalarSubquery(_)
7255            | Expr::Exists { .. }
7256            | Expr::InSubquery { .. }
7257            | Expr::Literal(_)
7258            | Expr::Placeholder(_)
7259            | Expr::Column(_) => {}
7260            // v7.10.10 — recurse children.
7261            Expr::Array(items) => {
7262                for elem in items {
7263                    self.resolve_expr_subqueries(elem, cancel)?;
7264                }
7265            }
7266            Expr::ArraySubscript { target, index } => {
7267                self.resolve_expr_subqueries(target, cancel)?;
7268                self.resolve_expr_subqueries(index, cancel)?;
7269            }
7270            Expr::AnyAll { expr, array, .. } => {
7271                self.resolve_expr_subqueries(expr, cancel)?;
7272                self.resolve_expr_subqueries(array, cancel)?;
7273            }
7274            Expr::Case {
7275                operand,
7276                branches,
7277                else_branch,
7278            } => {
7279                if let Some(o) = operand {
7280                    self.resolve_expr_subqueries(o, cancel)?;
7281                }
7282                for (w, t) in branches {
7283                    self.resolve_expr_subqueries(w, cancel)?;
7284                    self.resolve_expr_subqueries(t, cancel)?;
7285                }
7286                if let Some(e) = else_branch {
7287                    self.resolve_expr_subqueries(e, cancel)?;
7288                }
7289            }
7290        }
7291        Ok(())
7292    }
7293
7294    /// v4.23: per-row eval that handles correlated subqueries.
7295    /// Equivalent to `eval::eval_expr` when the expression has no
7296    /// subqueries; otherwise clones the expression, substitutes
7297    /// outer-row columns into each surviving subquery node, runs
7298    /// the inner SELECT, and replaces the node with the literal
7299    /// result. Only the WHERE-filter call sites use this path so
7300    /// the uncorrelated fast path is preserved everywhere else.
7301    fn eval_expr_with_correlated(
7302        &self,
7303        expr: &Expr,
7304        row: &Row,
7305        ctx: &EvalContext<'_>,
7306        cancel: CancelToken<'_>,
7307        memo: Option<&mut memoize::MemoizeCache>,
7308    ) -> Result<Value, EngineError> {
7309        if !expr_has_subquery(expr) {
7310            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
7311        }
7312        let mut e = expr.clone();
7313        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
7314        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
7315    }
7316
7317    fn resolve_correlated_in_expr(
7318        &self,
7319        e: &mut Expr,
7320        row: &Row,
7321        ctx: &EvalContext<'_>,
7322        cancel: CancelToken<'_>,
7323        mut memo: Option<&mut memoize::MemoizeCache>,
7324    ) -> Result<(), EngineError> {
7325        match e {
7326            Expr::ScalarSubquery(inner) => {
7327                // v6.2.6 — Memoize: build the cache key from the
7328                // pre-substitution subquery repr + the outer row's
7329                // values. Two outer rows with identical correlated
7330                // values hit the same entry.
7331                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
7332                    subquery_repr: alloc::format!("{}", **inner),
7333                    outer_values: row.values.clone(),
7334                });
7335                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
7336                    && let Some(cached) = cache.get(k)
7337                {
7338                    *e = value_to_literal_expr(cached)?;
7339                    return Ok(());
7340                }
7341                let mut s = (**inner).clone();
7342                substitute_outer_columns(&mut s, row, ctx);
7343                let r = self.exec_select_cancel(&s, cancel)?;
7344                let QueryResult::Rows { rows, .. } = r else {
7345                    return Err(EngineError::Unsupported(
7346                        "scalar subquery: inner did not return rows".into(),
7347                    ));
7348                };
7349                let value = match rows.as_slice() {
7350                    [] => Value::Null,
7351                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
7352                    _ => {
7353                        return Err(EngineError::Unsupported(alloc::format!(
7354                            "scalar subquery returned {} rows; expected 0 or 1",
7355                            rows.len()
7356                        )));
7357                    }
7358                };
7359                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
7360                    cache.insert(k, value.clone());
7361                }
7362                *e = value_to_literal_expr(value)?;
7363            }
7364            Expr::Exists { subquery, negated } => {
7365                let mut s = (**subquery).clone();
7366                substitute_outer_columns(&mut s, row, ctx);
7367                let r = self.exec_select_cancel(&s, cancel)?;
7368                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
7369                let bit = if *negated { !exists } else { exists };
7370                *e = Expr::Literal(Literal::Bool(bit));
7371            }
7372            Expr::InSubquery {
7373                expr: lhs,
7374                subquery,
7375                negated,
7376            } => {
7377                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7378                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
7379                let mut s = (**subquery).clone();
7380                substitute_outer_columns(&mut s, row, ctx);
7381                let r = self.exec_select_cancel(&s, cancel)?;
7382                let QueryResult::Rows { columns, rows, .. } = r else {
7383                    return Err(EngineError::Unsupported(
7384                        "IN-subquery: inner did not return rows".into(),
7385                    ));
7386                };
7387                if columns.len() != 1 {
7388                    return Err(EngineError::Unsupported(alloc::format!(
7389                        "IN-subquery must project exactly one column; got {}",
7390                        columns.len()
7391                    )));
7392                }
7393                let mut found = false;
7394                let mut any_null = false;
7395                for r0 in rows {
7396                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
7397                    if v.is_null() {
7398                        any_null = true;
7399                        continue;
7400                    }
7401                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
7402                        found = true;
7403                        break;
7404                    }
7405                }
7406                let bit = if found {
7407                    !*negated
7408                } else if any_null {
7409                    return Err(EngineError::Unsupported(
7410                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
7411                    ));
7412                } else {
7413                    *negated
7414                };
7415                *e = Expr::Literal(Literal::Bool(bit));
7416            }
7417            Expr::Binary { lhs, rhs, .. } => {
7418                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7419                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
7420            }
7421            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7422                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7423            }
7424            Expr::Like { expr, pattern, .. } => {
7425                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7426                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
7427            }
7428            Expr::FunctionCall { args, .. } => {
7429                for a in args {
7430                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
7431                }
7432            }
7433            Expr::Extract { source, .. } => {
7434                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
7435            }
7436            Expr::WindowFunction { .. }
7437            | Expr::Literal(_)
7438            | Expr::Placeholder(_)
7439            | Expr::Column(_) => {}
7440            // v7.10.10 — recurse children.
7441            Expr::Array(items) => {
7442                for elem in items {
7443                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
7444                }
7445            }
7446            Expr::ArraySubscript { target, index } => {
7447                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
7448                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
7449            }
7450            Expr::AnyAll { expr, array, .. } => {
7451                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7452                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
7453            }
7454            Expr::Case {
7455                operand,
7456                branches,
7457                else_branch,
7458            } => {
7459                if let Some(o) = operand {
7460                    self.resolve_correlated_in_expr(o, row, ctx, cancel, memo.as_deref_mut())?;
7461                }
7462                for (w, t) in branches {
7463                    self.resolve_correlated_in_expr(w, row, ctx, cancel, memo.as_deref_mut())?;
7464                    self.resolve_correlated_in_expr(t, row, ctx, cancel, memo.as_deref_mut())?;
7465                }
7466                if let Some(e) = else_branch {
7467                    self.resolve_correlated_in_expr(e, row, ctx, cancel, memo.as_deref_mut())?;
7468                }
7469            }
7470        }
7471        Ok(())
7472    }
7473
7474    fn subquery_replacement(
7475        &self,
7476        e: &Expr,
7477        cancel: CancelToken<'_>,
7478    ) -> Result<Option<Expr>, EngineError> {
7479        match e {
7480            Expr::ScalarSubquery(inner) => {
7481                let mut s = (**inner).clone();
7482                // Recurse into the inner SELECT first so nested
7483                // subqueries materialise bottom-up.
7484                self.resolve_select_subqueries(&mut s, cancel)?;
7485                let r = match self.exec_bare_select_cancel(&s, cancel) {
7486                    Ok(r) => r,
7487                    Err(e) if is_correlation_error(&e) => return Ok(None),
7488                    Err(e) => return Err(e),
7489                };
7490                let QueryResult::Rows { rows, .. } = r else {
7491                    return Err(EngineError::Unsupported(
7492                        "scalar subquery: inner statement did not return rows".into(),
7493                    ));
7494                };
7495                let value = match rows.as_slice() {
7496                    [] => Value::Null,
7497                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
7498                    _ => {
7499                        return Err(EngineError::Unsupported(alloc::format!(
7500                            "scalar subquery returned {} rows; expected 0 or 1",
7501                            rows.len()
7502                        )));
7503                    }
7504                };
7505                Ok(Some(value_to_literal_expr(value)?))
7506            }
7507            Expr::Exists { subquery, negated } => {
7508                let mut s = (**subquery).clone();
7509                self.resolve_select_subqueries(&mut s, cancel)?;
7510                let r = match self.exec_bare_select_cancel(&s, cancel) {
7511                    Ok(r) => r,
7512                    Err(e) if is_correlation_error(&e) => return Ok(None),
7513                    Err(e) => return Err(e),
7514                };
7515                let exists = match r {
7516                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
7517                    QueryResult::CommandOk { .. } => false,
7518                };
7519                let bit = if *negated { !exists } else { exists };
7520                Ok(Some(Expr::Literal(Literal::Bool(bit))))
7521            }
7522            Expr::InSubquery {
7523                expr,
7524                subquery,
7525                negated,
7526            } => {
7527                let mut s = (**subquery).clone();
7528                self.resolve_select_subqueries(&mut s, cancel)?;
7529                let r = match self.exec_bare_select_cancel(&s, cancel) {
7530                    Ok(r) => r,
7531                    Err(e) if is_correlation_error(&e) => return Ok(None),
7532                    Err(e) => return Err(e),
7533                };
7534                let QueryResult::Rows { columns, rows, .. } = r else {
7535                    return Err(EngineError::Unsupported(
7536                        "IN-subquery: inner statement did not return rows".into(),
7537                    ));
7538                };
7539                if columns.len() != 1 {
7540                    return Err(EngineError::Unsupported(alloc::format!(
7541                        "IN-subquery must project exactly one column; got {}",
7542                        columns.len()
7543                    )));
7544                }
7545                // Build the same OR-Eq chain the parse-time literal-list
7546                // path constructs, with each value lifted into a Literal.
7547                let mut acc: Option<Expr> = None;
7548                for row in rows {
7549                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
7550                    let lit = value_to_literal_expr(v)?;
7551                    let cmp = Expr::Binary {
7552                        lhs: expr.clone(),
7553                        op: BinOp::Eq,
7554                        rhs: Box::new(lit),
7555                    };
7556                    acc = Some(match acc {
7557                        None => cmp,
7558                        Some(prev) => Expr::Binary {
7559                            lhs: Box::new(prev),
7560                            op: BinOp::Or,
7561                            rhs: Box::new(cmp),
7562                        },
7563                    });
7564                }
7565                let combined = acc.unwrap_or(Expr::Literal(Literal::Bool(false)));
7566                let final_expr = if *negated {
7567                    Expr::Unary {
7568                        op: UnOp::Not,
7569                        expr: Box::new(combined),
7570                    }
7571                } else {
7572                    combined
7573                };
7574                Ok(Some(final_expr))
7575            }
7576            _ => Ok(None),
7577        }
7578    }
7579}
7580
7581// ---- v4.12 window-function helpers ----
7582// The (partition-key, order-key, original-index) tuple shape used
7583// across these helpers is intrinsic to the planner. Factoring it
7584// into a typedef adds indirection without making the code clearer,
7585// so several lints are allowed inline on the affected functions
7586// rather than module-wide.
7587
7588/// v4.22: cheap structural scan for `FROM <name>` (qualified or
7589/// not) inside a SELECT — used to verify the anchor of a WITH
7590/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
7591/// FROM joins, subqueries, and unions.
7592fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
7593    if let Some(from) = &stmt.from
7594        && from_refers_to(from, target)
7595    {
7596        return true;
7597    }
7598    for (_, peer) in &stmt.unions {
7599        if select_refers_to(peer, target) {
7600            return true;
7601        }
7602    }
7603    for item in &stmt.items {
7604        if let SelectItem::Expr { expr, .. } = item
7605            && expr_refers_to(expr, target)
7606        {
7607            return true;
7608        }
7609    }
7610    if let Some(w) = &stmt.where_
7611        && expr_refers_to(w, target)
7612    {
7613        return true;
7614    }
7615    false
7616}
7617
7618fn from_refers_to(from: &FromClause, target: &str) -> bool {
7619    if from.primary.name.eq_ignore_ascii_case(target) {
7620        return true;
7621    }
7622    from.joins
7623        .iter()
7624        .any(|j| j.table.name.eq_ignore_ascii_case(target))
7625}
7626
7627fn expr_refers_to(e: &Expr, target: &str) -> bool {
7628    match e {
7629        Expr::ScalarSubquery(s) => select_refers_to(s, target),
7630        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
7631            select_refers_to(subquery, target)
7632        }
7633        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
7634        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7635            expr_refers_to(expr, target)
7636        }
7637        Expr::Like { expr, pattern, .. } => {
7638            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
7639        }
7640        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
7641        Expr::Extract { source, .. } => expr_refers_to(source, target),
7642        Expr::WindowFunction {
7643            args,
7644            partition_by,
7645            order_by,
7646            ..
7647        } => {
7648            args.iter().any(|a| expr_refers_to(a, target))
7649                || partition_by.iter().any(|p| expr_refers_to(p, target))
7650                || order_by.iter().any(|(o, _)| expr_refers_to(o, target))
7651        }
7652        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
7653        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
7654        Expr::ArraySubscript { target: t, index } => {
7655            expr_refers_to(t, target) || expr_refers_to(index, target)
7656        }
7657        Expr::AnyAll { expr, array, .. } => {
7658            expr_refers_to(expr, target) || expr_refers_to(array, target)
7659        }
7660        Expr::Case {
7661            operand,
7662            branches,
7663            else_branch,
7664        } => {
7665            operand.as_deref().is_some_and(|o| expr_refers_to(o, target))
7666                || branches
7667                    .iter()
7668                    .any(|(w, t)| expr_refers_to(w, target) || expr_refers_to(t, target))
7669                || else_branch
7670                    .as_deref()
7671                    .is_some_and(|e| expr_refers_to(e, target))
7672        }
7673    }
7674}
7675
7676/// v4.22: pick more specific column types from observed rows when
7677/// the projection builder defaulted to Text (the v1.x behavior for
7678/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
7679/// land an Int column in the CTE storage table rather than failing
7680/// the insert with "expected TEXT, got INT".
7681fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
7682    let mut out = columns.to_vec();
7683    for (col_idx, col) in out.iter_mut().enumerate() {
7684        if col.ty != DataType::Text {
7685            continue;
7686        }
7687        let mut inferred: Option<DataType> = None;
7688        let mut all_null = true;
7689        for row in rows {
7690            let Some(v) = row.values.get(col_idx) else {
7691                continue;
7692            };
7693            let ty = match v {
7694                Value::Null => continue,
7695                Value::SmallInt(_) => DataType::SmallInt,
7696                Value::Int(_) => DataType::Int,
7697                Value::BigInt(_) => DataType::BigInt,
7698                Value::Float(_) => DataType::Float,
7699                Value::Bool(_) => DataType::Bool,
7700                Value::Vector(_) => DataType::Vector {
7701                    dim: 0,
7702                    encoding: VecEncoding::F32,
7703                },
7704                _ => DataType::Text,
7705            };
7706            all_null = false;
7707            inferred = Some(match inferred {
7708                None => ty,
7709                Some(prev) if prev == ty => prev,
7710                Some(_) => DataType::Text,
7711            });
7712        }
7713        if let Some(t) = inferred {
7714            col.ty = t;
7715            col.nullable = true;
7716        } else if all_null {
7717            col.nullable = true;
7718        }
7719    }
7720    out
7721}
7722
7723/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
7724/// Lines are pushed into `out`; `depth` controls indentation. We
7725/// describe the rewritten SELECT — what the executor *would* do —
7726/// using the engine handle to spot indexed lookups and table shapes.
7727#[allow(clippy::too_many_lines, clippy::format_push_string)]
7728/// v6.2.4 — Walk every line of the rendered plan tree and append
7729/// per-operator stats. Lines that name a known operator get
7730/// `(rows=N)` (`actual_rows` of the top-level operator equals the
7731/// final result row count; scans report their catalog row count
7732/// as the rows-considered metric). Other lines — Filter / Join /
7733/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
7734/// complete-by-construction; v6.2.5 fills these in via inline
7735/// executor counters.
7736/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
7737/// `(table, column)` pair the query touches via WHERE / JOIN
7738/// that doesn't already have an index on the owning table.
7739/// Walks the SELECT's FROM clauses + WHERE expression tree;
7740/// returns one line per missing index. Deterministic order:
7741/// FROM-clause iteration order, then column-reference walk
7742/// order inside each WHERE. Each suggestion is a copy-pastable
7743/// DDL string.
7744fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
7745    use alloc::collections::BTreeSet;
7746    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
7747    let mut out: Vec<String> = Vec::new();
7748    let cat = engine.active_catalog();
7749    // Build a (table, qualifier-or-alias) list from the FROM clause
7750    // so unqualified column refs in WHERE resolve to the correct
7751    // table.
7752    let Some(from) = &stmt.from else {
7753        return out;
7754    };
7755    let mut tables: Vec<String> = Vec::new();
7756    tables.push(from.primary.name.clone());
7757    for j in &from.joins {
7758        tables.push(j.table.name.clone());
7759    }
7760    // Collect column refs from the WHERE expression. JOIN ON
7761    // predicates also feed in.
7762    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
7763    if let Some(w) = &stmt.where_ {
7764        collect_column_refs(w, &mut col_refs);
7765    }
7766    for j in &from.joins {
7767        if let Some(on) = &j.on {
7768            collect_column_refs(on, &mut col_refs);
7769        }
7770    }
7771    for cn in &col_refs {
7772        // Resolve owner table: explicit qualifier first, else
7773        // first table in FROM that has a column of this name.
7774        let owner: Option<String> = if let Some(q) = &cn.qualifier {
7775            tables.iter().find(|t| t == &q).cloned()
7776        } else {
7777            tables.iter().find_map(|t| {
7778                cat.get(t).and_then(|tbl| {
7779                    if tbl.schema().column_position(&cn.name).is_some() {
7780                        Some(t.clone())
7781                    } else {
7782                        None
7783                    }
7784                })
7785            })
7786        };
7787        let Some(owner) = owner else {
7788            continue;
7789        };
7790        let Some(tbl) = cat.get(&owner) else {
7791            continue;
7792        };
7793        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
7794            continue;
7795        };
7796        // Skip if any BTree index already covers this column as
7797        // its key.
7798        let already_indexed = tbl.indices().iter().any(|i| {
7799            matches!(i.kind, spg_storage::IndexKind::BTree(_))
7800                && i.column_position == col_pos
7801                && i.expression.is_none()
7802                && i.partial_predicate.is_none()
7803        });
7804        if already_indexed {
7805            continue;
7806        }
7807        if seen.insert((owner.clone(), cn.name.clone())) {
7808            out.push(alloc::format!(
7809                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
7810                owner,
7811                cn.name,
7812                owner,
7813                cn.name
7814            ));
7815        }
7816    }
7817    out
7818}
7819
7820/// Walks an `Expr` and pushes every `ColumnName` it references.
7821/// Order is depth-first, left-to-right.
7822fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
7823    match expr {
7824        Expr::Column(cn) => out.push(cn.clone()),
7825        Expr::FunctionCall { args, .. } => {
7826            for a in args {
7827                collect_column_refs(a, out);
7828            }
7829        }
7830        Expr::Binary { lhs, rhs, .. } => {
7831            collect_column_refs(lhs, out);
7832            collect_column_refs(rhs, out);
7833        }
7834        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
7835        _ => {}
7836    }
7837}
7838
7839fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
7840    let catalog = engine.active_catalog();
7841    let cold_ids = catalog.cold_segment_ids_global();
7842    let any_cold = !cold_ids.is_empty();
7843    let cold_ids_repr = if any_cold {
7844        let mut s = alloc::string::String::from("[");
7845        for (i, id) in cold_ids.iter().enumerate() {
7846            if i > 0 {
7847                s.push(',');
7848            }
7849            s.push_str(&alloc::format!("{id}"));
7850        }
7851        s.push(']');
7852        s
7853    } else {
7854        alloc::string::String::new()
7855    };
7856    for (idx, line) in lines.iter_mut().enumerate() {
7857        let trimmed = line.trim_start();
7858        let is_top_level = idx == 0;
7859        if is_top_level {
7860            line.push_str(&alloc::format!(" (rows={total_rows})"));
7861            continue;
7862        }
7863        if let Some(rest) = trimmed.strip_prefix("From: ") {
7864            let (name, scan_kind) = match rest.split_once(" [") {
7865                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
7866                None => (rest.trim(), ""),
7867            };
7868            let bare = name.split_whitespace().next().unwrap_or(name);
7869            let hot = catalog.get(bare).map(|t| t.rows().len());
7870            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
7871            // cold-tier segment the scan COULD have walked. v6.2.x
7872            // can tighten to per-table by walking the table's
7873            // BTree-index cold locators.
7874            let annot = match (hot, scan_kind) {
7875                (Some(h), "full scan") => {
7876                    let mut s = alloc::format!(" (hot_rows={h}");
7877                    if any_cold {
7878                        s.push_str(&alloc::format!(
7879                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7880                        ));
7881                    }
7882                    s.push(')');
7883                    s
7884                }
7885                (Some(h), "index seek") => {
7886                    let mut s = alloc::format!(" (hot_rows≤{h}");
7887                    if any_cold {
7888                        s.push_str(&alloc::format!(
7889                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7890                        ));
7891                    }
7892                    s.push(')');
7893                    s
7894                }
7895                _ => " (rows=—)".to_string(),
7896            };
7897            line.push_str(&annot);
7898            continue;
7899        }
7900        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
7901        line.push_str(" (rows=—)");
7902    }
7903}
7904
7905fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
7906    let pad = "  ".repeat(depth);
7907    // 1) Top-level operator label.
7908    let top = if !stmt.ctes.is_empty() {
7909        if stmt.ctes.iter().any(|c| c.recursive) {
7910            "CTEScan (WITH RECURSIVE)"
7911        } else {
7912            "CTEScan (WITH)"
7913        }
7914    } else if !stmt.unions.is_empty() {
7915        "UnionScan"
7916    } else if select_has_window(stmt) {
7917        "WindowAgg"
7918    } else if aggregate::uses_aggregate(stmt) {
7919        "Aggregate"
7920    } else if stmt.distinct {
7921        "Distinct"
7922    } else if stmt.from.is_some() {
7923        "TableScan"
7924    } else {
7925        "Result"
7926    };
7927    out.push(alloc::format!("{pad}{top}"));
7928    let child = "  ".repeat(depth + 1);
7929    // 2) CTE bodies.
7930    for cte in &stmt.ctes {
7931        let head = if cte.recursive {
7932            alloc::format!("{child}CTE (recursive): {}", cte.name)
7933        } else {
7934            alloc::format!("{child}CTE: {}", cte.name)
7935        };
7936        out.push(head);
7937        explain_select(&cte.body, engine, depth + 2, out);
7938    }
7939    // 3) FROM details — primary table + joins, index hits.
7940    if let Some(from) = &stmt.from {
7941        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
7942        if let Some(alias) = &from.primary.alias {
7943            tag.push_str(&alloc::format!(" AS {alias}"));
7944        }
7945        // Try to detect an index-seek opportunity on WHERE against
7946        // the primary table — same heuristic the executor uses.
7947        if let Some(w) = &stmt.where_
7948            && let Some(table) = engine.active_catalog().get(&from.primary.name)
7949        {
7950            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
7951            let cols = &table.schema().columns;
7952            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
7953                tag.push_str(" [index seek]");
7954            } else {
7955                tag.push_str(" [full scan]");
7956            }
7957        } else {
7958            tag.push_str(" [full scan]");
7959        }
7960        out.push(tag);
7961        for j in &from.joins {
7962            let kind = match j.kind {
7963                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
7964                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
7965                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
7966            };
7967            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
7968            if let Some(alias) = &j.table.alias {
7969                s.push_str(&alloc::format!(" AS {alias}"));
7970            }
7971            if j.on.is_some() {
7972                s.push_str(" (ON …)");
7973            }
7974            out.push(s);
7975        }
7976    }
7977    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
7978    if let Some(w) = &stmt.where_ {
7979        let mut s = alloc::format!("{child}Filter: {w}");
7980        if expr_has_subquery(w) {
7981            s.push_str(" [subquery]");
7982        }
7983        out.push(s);
7984    }
7985    if let Some(gs) = &stmt.group_by {
7986        let mut parts = Vec::new();
7987        for g in gs {
7988            parts.push(alloc::format!("{g}"));
7989        }
7990        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
7991    }
7992    if let Some(h) = &stmt.having {
7993        out.push(alloc::format!("{child}Having: {h}"));
7994    }
7995    for o in &stmt.order_by {
7996        let dir = if o.desc { "DESC" } else { "ASC" };
7997        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
7998    }
7999    if let Some(lim) = stmt.limit {
8000        out.push(alloc::format!("{child}Limit: {lim}"));
8001    }
8002    if let Some(off) = stmt.offset {
8003        out.push(alloc::format!("{child}Offset: {off}"));
8004    }
8005    // 5) Projection — collapse Wildcard or render N items.
8006    if stmt
8007        .items
8008        .iter()
8009        .any(|it| matches!(it, SelectItem::Wildcard))
8010    {
8011        out.push(alloc::format!("{child}Project: *"));
8012    } else {
8013        out.push(alloc::format!(
8014            "{child}Project: {} item(s)",
8015            stmt.items.len()
8016        ));
8017    }
8018    // 6) Recurse into UNION peers.
8019    for (kind, peer) in &stmt.unions {
8020        let label = match kind {
8021            UnionKind::All => "UNION ALL",
8022            UnionKind::Distinct => "UNION",
8023        };
8024        out.push(alloc::format!("{child}{label}"));
8025        explain_select(peer, engine, depth + 2, out);
8026    }
8027}
8028
8029/// v4.23: recognise the engine errors that indicate the inner
8030/// SELECT couldn't be evaluated in isolation because it references
8031/// an outer column — used by `subquery_replacement` to skip
8032/// materialisation and let row-eval handle it instead.
8033fn is_correlation_error(e: &EngineError) -> bool {
8034    matches!(
8035        e,
8036        EngineError::Eval(
8037            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
8038        )
8039    )
8040}
8041
8042/// v4.23: walk every Expr in `stmt` and replace each Column ref
8043/// that targets the outer scope (qualifier matches the outer
8044/// table alias) with a Literal carrying the outer row's value.
8045/// Conservative: only qualified refs are substituted, so the user
8046/// must write `outer_alias.col` to reference an outer column. This
8047/// matches PG's lexical scoping for correlated subqueries and
8048/// avoids accidentally rebinding inner columns of the same name.
8049fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
8050    let Some(outer_alias) = ctx.table_alias else {
8051        return;
8052    };
8053    substitute_in_select(stmt, row, ctx, outer_alias);
8054}
8055
8056fn substitute_in_select(
8057    stmt: &mut SelectStatement,
8058    row: &Row,
8059    ctx: &EvalContext<'_>,
8060    outer_alias: &str,
8061) {
8062    for item in &mut stmt.items {
8063        if let SelectItem::Expr { expr, .. } = item {
8064            substitute_in_expr(expr, row, ctx, outer_alias);
8065        }
8066    }
8067    if let Some(w) = &mut stmt.where_ {
8068        substitute_in_expr(w, row, ctx, outer_alias);
8069    }
8070    if let Some(gs) = &mut stmt.group_by {
8071        for g in gs {
8072            substitute_in_expr(g, row, ctx, outer_alias);
8073        }
8074    }
8075    if let Some(h) = &mut stmt.having {
8076        substitute_in_expr(h, row, ctx, outer_alias);
8077    }
8078    for o in &mut stmt.order_by {
8079        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
8080    }
8081    for (_, peer) in &mut stmt.unions {
8082        substitute_in_select(peer, row, ctx, outer_alias);
8083    }
8084}
8085
8086fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
8087    if let Expr::Column(c) = e
8088        && let Some(qual) = &c.qualifier
8089        && qual.eq_ignore_ascii_case(outer_alias)
8090    {
8091        // Look up the column's index in the outer schema.
8092        if let Some(idx) = ctx
8093            .columns
8094            .iter()
8095            .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
8096        {
8097            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
8098            if let Ok(lit) = value_to_literal_expr(v) {
8099                *e = lit;
8100                return;
8101            }
8102        }
8103    }
8104    match e {
8105        Expr::Binary { lhs, rhs, .. } => {
8106            substitute_in_expr(lhs, row, ctx, outer_alias);
8107            substitute_in_expr(rhs, row, ctx, outer_alias);
8108        }
8109        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8110            substitute_in_expr(expr, row, ctx, outer_alias);
8111        }
8112        Expr::Like { expr, pattern, .. } => {
8113            substitute_in_expr(expr, row, ctx, outer_alias);
8114            substitute_in_expr(pattern, row, ctx, outer_alias);
8115        }
8116        Expr::FunctionCall { args, .. } => {
8117            for a in args {
8118                substitute_in_expr(a, row, ctx, outer_alias);
8119            }
8120        }
8121        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
8122        Expr::WindowFunction {
8123            args,
8124            partition_by,
8125            order_by,
8126            ..
8127        } => {
8128            for a in args {
8129                substitute_in_expr(a, row, ctx, outer_alias);
8130            }
8131            for p in partition_by {
8132                substitute_in_expr(p, row, ctx, outer_alias);
8133            }
8134            for (o, _) in order_by {
8135                substitute_in_expr(o, row, ctx, outer_alias);
8136            }
8137        }
8138        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
8139        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
8140            substitute_in_select(subquery, row, ctx, outer_alias);
8141        }
8142        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
8143        Expr::Array(items) => {
8144            for elem in items {
8145                substitute_in_expr(elem, row, ctx, outer_alias);
8146            }
8147        }
8148        Expr::ArraySubscript { target, index } => {
8149            substitute_in_expr(target, row, ctx, outer_alias);
8150            substitute_in_expr(index, row, ctx, outer_alias);
8151        }
8152        Expr::AnyAll { expr, array, .. } => {
8153            substitute_in_expr(expr, row, ctx, outer_alias);
8154            substitute_in_expr(array, row, ctx, outer_alias);
8155        }
8156        Expr::Case {
8157            operand,
8158            branches,
8159            else_branch,
8160        } => {
8161            if let Some(o) = operand {
8162                substitute_in_expr(o, row, ctx, outer_alias);
8163            }
8164            for (w, t) in branches {
8165                substitute_in_expr(w, row, ctx, outer_alias);
8166                substitute_in_expr(t, row, ctx, outer_alias);
8167            }
8168            if let Some(e) = else_branch {
8169                substitute_in_expr(e, row, ctx, outer_alias);
8170            }
8171        }
8172    }
8173}
8174
8175/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
8176/// dedup inside the recursive iteration. Crude but deterministic
8177/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
8178fn encode_row_key(row: &Row) -> Vec<u8> {
8179    let mut out = Vec::new();
8180    for v in &row.values {
8181        let s = alloc::format!("{v:?}|");
8182        out.extend_from_slice(s.as_bytes());
8183    }
8184    out
8185}
8186
8187fn select_has_window(stmt: &SelectStatement) -> bool {
8188    for item in &stmt.items {
8189        if let SelectItem::Expr { expr, .. } = item
8190            && expr_has_window(expr)
8191        {
8192            return true;
8193        }
8194    }
8195    false
8196}
8197
8198fn expr_has_window(e: &Expr) -> bool {
8199    match e {
8200        Expr::WindowFunction { .. } => true,
8201        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
8202        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8203            expr_has_window(expr)
8204        }
8205        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
8206        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
8207        Expr::Extract { source, .. } => expr_has_window(source),
8208        Expr::ScalarSubquery(_)
8209        | Expr::Exists { .. }
8210        | Expr::InSubquery { .. }
8211        | Expr::Literal(_)
8212        | Expr::Placeholder(_)
8213        | Expr::Column(_) => false,
8214        Expr::Array(items) => items.iter().any(expr_has_window),
8215        Expr::ArraySubscript { target, index } => expr_has_window(target) || expr_has_window(index),
8216        Expr::AnyAll { expr, array, .. } => expr_has_window(expr) || expr_has_window(array),
8217        Expr::Case {
8218            operand,
8219            branches,
8220            else_branch,
8221        } => {
8222            operand.as_deref().is_some_and(expr_has_window)
8223                || branches
8224                    .iter()
8225                    .any(|(w, t)| expr_has_window(w) || expr_has_window(t))
8226                || else_branch.as_deref().is_some_and(expr_has_window)
8227        }
8228    }
8229}
8230
8231fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
8232    if let Expr::WindowFunction { .. } = e {
8233        // Deduplicate by structural equality on the expression
8234        // (cheap because window args + partition + order are
8235        // small). Without dedup we'd recompute identical windows
8236        // once per occurrence in the projection.
8237        if !out.iter().any(|x| x == e) {
8238            out.push(e.clone());
8239        }
8240        return;
8241    }
8242    match e {
8243        // Already handled by the early-return at the top.
8244        Expr::WindowFunction { .. } => unreachable!(),
8245        Expr::Binary { lhs, rhs, .. } => {
8246            collect_window_nodes(lhs, out);
8247            collect_window_nodes(rhs, out);
8248        }
8249        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8250            collect_window_nodes(expr, out);
8251        }
8252        Expr::FunctionCall { args, .. } => {
8253            for a in args {
8254                collect_window_nodes(a, out);
8255            }
8256        }
8257        Expr::Like { expr, pattern, .. } => {
8258            collect_window_nodes(expr, out);
8259            collect_window_nodes(pattern, out);
8260        }
8261        Expr::Extract { source, .. } => collect_window_nodes(source, out),
8262        _ => {}
8263    }
8264}
8265
8266fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
8267    if let Expr::WindowFunction { .. } = e
8268        && let Some(idx) = window_nodes.iter().position(|w| w == e)
8269    {
8270        *e = Expr::Column(spg_sql::ast::ColumnName {
8271            qualifier: None,
8272            name: alloc::format!("__win_{idx}"),
8273        });
8274        return;
8275    }
8276    match e {
8277        Expr::Binary { lhs, rhs, .. } => {
8278            rewrite_window_to_columns(lhs, window_nodes);
8279            rewrite_window_to_columns(rhs, window_nodes);
8280        }
8281        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8282            rewrite_window_to_columns(expr, window_nodes);
8283        }
8284        Expr::FunctionCall { args, .. } => {
8285            for a in args {
8286                rewrite_window_to_columns(a, window_nodes);
8287            }
8288        }
8289        Expr::Like { expr, pattern, .. } => {
8290            rewrite_window_to_columns(expr, window_nodes);
8291            rewrite_window_to_columns(pattern, window_nodes);
8292        }
8293        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
8294        _ => {}
8295    }
8296}
8297
8298/// Total order over partition-key tuples. NULL sorts as the
8299/// lowest value (matches the `<` partial order's NULL-last
8300/// behaviour with `INFINITY` flipped).
8301fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
8302    for (x, y) in a.iter().zip(b.iter()) {
8303        let c = value_cmp(x, y);
8304        if c != core::cmp::Ordering::Equal {
8305            return c;
8306        }
8307    }
8308    a.len().cmp(&b.len())
8309}
8310
8311fn order_key_cmp(a: &[(Value, bool)], b: &[(Value, bool)]) -> core::cmp::Ordering {
8312    for ((va, desc), (vb, _)) in a.iter().zip(b.iter()) {
8313        let c = value_cmp(va, vb);
8314        let c = if *desc { c.reverse() } else { c };
8315        if c != core::cmp::Ordering::Equal {
8316            return c;
8317        }
8318    }
8319    a.len().cmp(&b.len())
8320}
8321
8322#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
8323fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
8324    use core::cmp::Ordering;
8325    match (a, b) {
8326        (Value::Null, Value::Null) => Ordering::Equal,
8327        (Value::Null, _) => Ordering::Less,
8328        (_, Value::Null) => Ordering::Greater,
8329        (Value::Int(x), Value::Int(y)) => x.cmp(y),
8330        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
8331        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
8332        (Value::Text(x), Value::Text(y)) => x.cmp(y),
8333        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
8334        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
8335        (Value::Date(x), Value::Date(y)) => x.cmp(y),
8336        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
8337        // Cross-type compare: fall back to the debug rendering —
8338        // same-partition is the goal, exact order is irrelevant.
8339        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
8340    }
8341}
8342
8343/// Compute the window function's per-row output for one partition.
8344/// `slice` has (partition key, order key, original-row-index)
8345/// tuples already sorted by order key. `filtered_rows` is the
8346/// full row list indexed by original-row-index. `out_vals` is
8347/// the destination, also indexed by original-row-index.
8348#[allow(
8349    clippy::too_many_arguments,
8350    clippy::cast_possible_truncation,
8351    clippy::cast_possible_wrap,
8352    clippy::cast_precision_loss,
8353    clippy::cast_sign_loss,
8354    clippy::doc_markdown,
8355    clippy::too_many_lines,
8356    clippy::type_complexity,
8357    clippy::match_same_arms
8358)]
8359fn compute_window_partition(
8360    name: &str,
8361    args: &[Expr],
8362    ordered: bool,
8363    frame: Option<&WindowFrame>,
8364    null_treatment: spg_sql::ast::NullTreatment,
8365    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8366    filtered_rows: &[&Row],
8367    ctx: &EvalContext<'_>,
8368    out_vals: &mut [Value],
8369) -> Result<(), EngineError> {
8370    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
8371    let lower = name.to_ascii_lowercase();
8372    match lower.as_str() {
8373        "row_number" => {
8374            for (rank, (_, _, idx)) in slice.iter().enumerate() {
8375                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
8376            }
8377            Ok(())
8378        }
8379        "rank" => {
8380            let mut prev_key: Option<&[(Value, bool)]> = None;
8381            let mut current_rank: i64 = 1;
8382            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8383                if let Some(p) = prev_key
8384                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8385                {
8386                    current_rank = (i + 1) as i64;
8387                }
8388                if prev_key.is_none() {
8389                    current_rank = 1;
8390                }
8391                out_vals[*idx] = Value::BigInt(current_rank);
8392                prev_key = Some(okey.as_slice());
8393            }
8394            Ok(())
8395        }
8396        "dense_rank" => {
8397            let mut prev_key: Option<&[(Value, bool)]> = None;
8398            let mut current_rank: i64 = 0;
8399            for (_, okey, idx) in slice {
8400                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
8401                    current_rank += 1;
8402                }
8403                out_vals[*idx] = Value::BigInt(current_rank);
8404                prev_key = Some(okey.as_slice());
8405            }
8406            Ok(())
8407        }
8408        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
8409            // Pre-evaluate the function arg per row in the slice
8410            // (count_star has no arg).
8411            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
8412                slice.iter().map(|_| Value::Null).collect()
8413            } else {
8414                slice
8415                    .iter()
8416                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8417                    .collect::<Result<_, _>>()
8418                    .map_err(EngineError::Eval)?
8419            };
8420            // v4.20: pick the effective frame. Explicit frame
8421            // overrides the implicit default (running for ordered,
8422            // whole-partition for unordered).
8423            let eff = effective_frame(frame, ordered)?;
8424            #[allow(clippy::needless_range_loop)]
8425            for i in 0..slice.len() {
8426                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8427                let mut sum: f64 = 0.0;
8428                let mut count: i64 = 0;
8429                let mut min_v: Option<f64> = None;
8430                let mut max_v: Option<f64> = None;
8431                let mut row_count: i64 = 0;
8432                if lo <= hi {
8433                    for j in lo..=hi {
8434                        let v = &arg_values[j];
8435                        match lower.as_str() {
8436                            "count_star" => row_count += 1,
8437                            "count" => {
8438                                if !v.is_null() {
8439                                    count += 1;
8440                                }
8441                            }
8442                            _ => {
8443                                if let Some(x) = value_to_f64(v) {
8444                                    sum += x;
8445                                    count += 1;
8446                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
8447                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
8448                                }
8449                            }
8450                        }
8451                    }
8452                }
8453                let value = match lower.as_str() {
8454                    "count_star" => Value::BigInt(row_count),
8455                    "count" => Value::BigInt(count),
8456                    "sum" => Value::Float(sum),
8457                    "avg" => {
8458                        if count == 0 {
8459                            Value::Null
8460                        } else {
8461                            Value::Float(sum / count as f64)
8462                        }
8463                    }
8464                    "min" => min_v.map_or(Value::Null, Value::Float),
8465                    "max" => max_v.map_or(Value::Null, Value::Float),
8466                    _ => unreachable!(),
8467                };
8468                let (_, _, idx) = &slice[i];
8469                out_vals[*idx] = value;
8470            }
8471            Ok(())
8472        }
8473        "lag" | "lead" => {
8474            // lag(expr [, offset [, default]])
8475            // lead(expr [, offset [, default]])
8476            if args.is_empty() {
8477                return Err(EngineError::Unsupported(alloc::format!(
8478                    "{lower}() requires at least one argument"
8479                )));
8480            }
8481            let offset: i64 = if args.len() >= 2 {
8482                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
8483                    .map_err(EngineError::Eval)?;
8484                match v {
8485                    Value::SmallInt(n) => i64::from(n),
8486                    Value::Int(n) => i64::from(n),
8487                    Value::BigInt(n) => n,
8488                    _ => {
8489                        return Err(EngineError::Unsupported(alloc::format!(
8490                            "{lower}() offset must be integer"
8491                        )));
8492                    }
8493                }
8494            } else {
8495                1
8496            };
8497            let default: Value = if args.len() >= 3 {
8498                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
8499                    .map_err(EngineError::Eval)?
8500            } else {
8501                Value::Null
8502            };
8503            let values: Vec<Value> = slice
8504                .iter()
8505                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8506                .collect::<Result<_, _>>()
8507                .map_err(EngineError::Eval)?;
8508            let n = slice.len();
8509            for (i, (_, _, idx)) in slice.iter().enumerate() {
8510                let signed_offset = if lower == "lag" { -offset } else { offset };
8511                let v = if ignore_nulls {
8512                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
8513                    // skipping NULL values; the `offset`-th non-NULL
8514                    // encountered is the result.
8515                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
8516                    let needed: i64 = signed_offset.abs();
8517                    if needed == 0 {
8518                        values[i].clone()
8519                    } else {
8520                        let mut j: i64 = i as i64;
8521                        let mut hits: i64 = 0;
8522                        let mut found: Option<Value> = None;
8523                        loop {
8524                            j += step;
8525                            if j < 0 || j >= n as i64 {
8526                                break;
8527                            }
8528                            #[allow(clippy::cast_sign_loss)]
8529                            let v = &values[j as usize];
8530                            if !v.is_null() {
8531                                hits += 1;
8532                                if hits == needed {
8533                                    found = Some(v.clone());
8534                                    break;
8535                                }
8536                            }
8537                        }
8538                        found.unwrap_or_else(|| default.clone())
8539                    }
8540                } else {
8541                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
8542                    if target_signed < 0 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX) {
8543                        default.clone()
8544                    } else {
8545                        #[allow(clippy::cast_sign_loss)]
8546                        {
8547                            values[target_signed as usize].clone()
8548                        }
8549                    }
8550                };
8551                out_vals[*idx] = v;
8552            }
8553            Ok(())
8554        }
8555        "first_value" | "last_value" | "nth_value" => {
8556            if args.is_empty() {
8557                return Err(EngineError::Unsupported(alloc::format!(
8558                    "{lower}() requires at least one argument"
8559                )));
8560            }
8561            let values: Vec<Value> = slice
8562                .iter()
8563                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8564                .collect::<Result<_, _>>()
8565                .map_err(EngineError::Eval)?;
8566            let nth: usize = if lower == "nth_value" {
8567                if args.len() < 2 {
8568                    return Err(EngineError::Unsupported(
8569                        "nth_value() requires (expr, n)".into(),
8570                    ));
8571                }
8572                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
8573                    .map_err(EngineError::Eval)?;
8574                let raw = match v {
8575                    Value::SmallInt(n) => i64::from(n),
8576                    Value::Int(n) => i64::from(n),
8577                    Value::BigInt(n) => n,
8578                    _ => {
8579                        return Err(EngineError::Unsupported(
8580                            "nth_value() n must be integer".into(),
8581                        ));
8582                    }
8583                };
8584                if raw < 1 {
8585                    return Err(EngineError::Unsupported(
8586                        "nth_value() n must be >= 1".into(),
8587                    ));
8588                }
8589                #[allow(clippy::cast_sign_loss)]
8590                {
8591                    raw as usize
8592                }
8593            } else {
8594                0
8595            };
8596            let eff = effective_frame(frame, ordered)?;
8597            for i in 0..slice.len() {
8598                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8599                let (_, _, idx) = &slice[i];
8600                let v = if lo > hi {
8601                    Value::Null
8602                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
8603                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
8604                    // selecting the boundary value within the frame.
8605                    if lower == "first_value" {
8606                        (lo..=hi)
8607                            .find_map(|j| {
8608                                let v = &values[j];
8609                                (!v.is_null()).then(|| v.clone())
8610                            })
8611                            .unwrap_or(Value::Null)
8612                    } else {
8613                        (lo..=hi)
8614                            .rev()
8615                            .find_map(|j| {
8616                                let v = &values[j];
8617                                (!v.is_null()).then(|| v.clone())
8618                            })
8619                            .unwrap_or(Value::Null)
8620                    }
8621                } else {
8622                    match lower.as_str() {
8623                        "first_value" => values[lo].clone(),
8624                        "last_value" => values[hi].clone(),
8625                        "nth_value" => {
8626                            let pos = lo + nth - 1;
8627                            if pos > hi {
8628                                Value::Null
8629                            } else {
8630                                values[pos].clone()
8631                            }
8632                        }
8633                        _ => unreachable!(),
8634                    }
8635                };
8636                out_vals[*idx] = v;
8637            }
8638            Ok(())
8639        }
8640        "ntile" => {
8641            if args.is_empty() {
8642                return Err(EngineError::Unsupported(
8643                    "ntile(n) requires an integer argument".into(),
8644                ));
8645            }
8646            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
8647                .map_err(EngineError::Eval)?;
8648            let bucket_count: i64 = match v {
8649                Value::SmallInt(n) => i64::from(n),
8650                Value::Int(n) => i64::from(n),
8651                Value::BigInt(n) => n,
8652                _ => {
8653                    return Err(EngineError::Unsupported(
8654                        "ntile() argument must be integer".into(),
8655                    ));
8656                }
8657            };
8658            if bucket_count < 1 {
8659                return Err(EngineError::Unsupported(
8660                    "ntile() argument must be >= 1".into(),
8661                ));
8662            }
8663            #[allow(clippy::cast_sign_loss)]
8664            let buckets = bucket_count as usize;
8665            let n = slice.len();
8666            // Each bucket gets `base` rows; the first `extras` buckets
8667            // get one extra. PG semantics.
8668            let base = n / buckets;
8669            let extras = n % buckets;
8670            let mut bucket: usize = 1;
8671            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
8672            let mut buckets_with_extra_remaining = extras;
8673            for (_, _, idx) in slice {
8674                if remaining_in_bucket == 0 {
8675                    bucket += 1;
8676                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
8677                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
8678                        base + 1
8679                    } else {
8680                        base
8681                    };
8682                    // Edge: if base==0 and extras==0, all rows fit;
8683                    // shouldn't reach here, but guard anyway.
8684                    if remaining_in_bucket == 0 {
8685                        remaining_in_bucket = 1;
8686                    }
8687                }
8688                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
8689                remaining_in_bucket -= 1;
8690            }
8691            Ok(())
8692        }
8693        "percent_rank" => {
8694            // (rank - 1) / (n - 1) where rank is the standard RANK().
8695            // Single-row partitions get 0.
8696            let n = slice.len();
8697            let mut prev_key: Option<&[(Value, bool)]> = None;
8698            let mut current_rank: i64 = 1;
8699            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8700                if let Some(p) = prev_key
8701                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8702                {
8703                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
8704                }
8705                if prev_key.is_none() {
8706                    current_rank = 1;
8707                }
8708                #[allow(clippy::cast_precision_loss)]
8709                let pr = if n <= 1 {
8710                    0.0
8711                } else {
8712                    (current_rank - 1) as f64 / (n - 1) as f64
8713                };
8714                out_vals[*idx] = Value::Float(pr);
8715                prev_key = Some(okey.as_slice());
8716            }
8717            Ok(())
8718        }
8719        "cume_dist" => {
8720            // # rows up to and including this row's peer group / n.
8721            let n = slice.len();
8722            // First pass: find peer-group-end rank for each row.
8723            for i in 0..slice.len() {
8724                let peer_end = peer_group_end(slice, i);
8725                #[allow(clippy::cast_precision_loss)]
8726                let cd = (peer_end + 1) as f64 / n as f64;
8727                let (_, _, idx) = &slice[i];
8728                out_vals[*idx] = Value::Float(cd);
8729            }
8730            Ok(())
8731        }
8732        other => Err(EngineError::Unsupported(alloc::format!(
8733            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
8734        ))),
8735    }
8736}
8737
8738/// v4.20: resolve the user-provided frame down to a normalised
8739/// `(kind, start, end)`. `None` means default — derive from
8740/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
8741/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
8742/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
8743/// end → CURRENT ROW per the PG spec.
8744fn effective_frame(
8745    frame: Option<&WindowFrame>,
8746    ordered: bool,
8747) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
8748    match frame {
8749        None => {
8750            if ordered {
8751                Ok((
8752                    FrameKind::Range,
8753                    FrameBound::UnboundedPreceding,
8754                    FrameBound::CurrentRow,
8755                ))
8756            } else {
8757                Ok((
8758                    FrameKind::Rows,
8759                    FrameBound::UnboundedPreceding,
8760                    FrameBound::UnboundedFollowing,
8761                ))
8762            }
8763        }
8764        Some(fr) => {
8765            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
8766            // Reject start > end (a few impossible combinations).
8767            if matches!(fr.start, FrameBound::UnboundedFollowing)
8768                || matches!(end, FrameBound::UnboundedPreceding)
8769            {
8770                return Err(EngineError::Unsupported(alloc::format!(
8771                    "invalid frame: start={:?} end={:?}",
8772                    fr.start,
8773                    end
8774                )));
8775            }
8776            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
8777            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
8778            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
8779            // implemented in v4.20.
8780            if fr.kind == FrameKind::Range
8781                && (matches!(
8782                    fr.start,
8783                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8784                ) || matches!(
8785                    end,
8786                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8787                ))
8788            {
8789                return Err(EngineError::Unsupported(
8790                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
8791                ));
8792            }
8793            Ok((fr.kind, fr.start.clone(), end))
8794        }
8795    }
8796}
8797
8798/// Compute `(lo, hi)` row-index bounds inside the partition slice
8799/// for the row at position `i`. Inclusive, clamped to
8800/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
8801#[allow(clippy::type_complexity)]
8802fn frame_bounds_for_row(
8803    eff: &(FrameKind, FrameBound, FrameBound),
8804    i: usize,
8805    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8806) -> (usize, usize) {
8807    let (kind, start, end) = eff;
8808    let n = slice.len();
8809    let last = n.saturating_sub(1);
8810    let (mut lo, mut hi) = match kind {
8811        FrameKind::Rows => {
8812            let lo = match start {
8813                FrameBound::UnboundedPreceding => 0,
8814                FrameBound::OffsetPreceding(k) => {
8815                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8816                    i.saturating_sub(k)
8817                }
8818                FrameBound::CurrentRow => i,
8819                FrameBound::OffsetFollowing(k) => {
8820                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8821                    i.saturating_add(k).min(last)
8822                }
8823                FrameBound::UnboundedFollowing => last,
8824            };
8825            let hi = match end {
8826                FrameBound::UnboundedPreceding => 0,
8827                FrameBound::OffsetPreceding(k) => {
8828                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8829                    i.saturating_sub(k)
8830                }
8831                FrameBound::CurrentRow => i,
8832                FrameBound::OffsetFollowing(k) => {
8833                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8834                    i.saturating_add(k).min(last)
8835                }
8836                FrameBound::UnboundedFollowing => last,
8837            };
8838            (lo, hi)
8839        }
8840        FrameKind::Range => {
8841            // RANGE bounds are peer-aware. With only UNBOUNDED and
8842            // CURRENT ROW supported (rejected at effective_frame for
8843            // explicit offsets), the start/end map to the
8844            // partition's full extent at the same-order-key peer
8845            // group boundary.
8846            let lo = match start {
8847                FrameBound::UnboundedPreceding => 0,
8848                FrameBound::CurrentRow => peer_group_start(slice, i),
8849                FrameBound::UnboundedFollowing => last,
8850                _ => unreachable!("offset bounds rejected for RANGE"),
8851            };
8852            let hi = match end {
8853                FrameBound::UnboundedPreceding => 0,
8854                FrameBound::CurrentRow => peer_group_end(slice, i),
8855                FrameBound::UnboundedFollowing => last,
8856                _ => unreachable!("offset bounds rejected for RANGE"),
8857            };
8858            (lo, hi)
8859        }
8860    };
8861    if hi >= n {
8862        hi = last;
8863    }
8864    if lo >= n {
8865        lo = last;
8866    }
8867    (lo, hi)
8868}
8869
8870/// Find the inclusive index of the first row with the same ORDER
8871/// BY key as `slice[i]`. Slice is already sorted by partition then
8872/// order, so peers are contiguous.
8873#[allow(clippy::type_complexity)]
8874fn peer_group_start(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8875    let key = &slice[i].1;
8876    let mut j = i;
8877    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
8878        j -= 1;
8879    }
8880    j
8881}
8882
8883/// Find the inclusive index of the last row with the same ORDER
8884/// BY key as `slice[i]`.
8885#[allow(clippy::type_complexity)]
8886fn peer_group_end(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8887    let key = &slice[i].1;
8888    let mut j = i;
8889    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
8890        j += 1;
8891    }
8892    j
8893}
8894
8895fn value_to_f64(v: &Value) -> Option<f64> {
8896    match v {
8897        Value::SmallInt(n) => Some(f64::from(*n)),
8898        Value::Int(n) => Some(f64::from(*n)),
8899        #[allow(clippy::cast_precision_loss)]
8900        Value::BigInt(n) => Some(*n as f64),
8901        Value::Float(x) => Some(*x),
8902        _ => None,
8903    }
8904}
8905
8906/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
8907/// projection / `order_by` — saves cloning the AST when there are
8908/// none (the common case).
8909fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
8910    let mut any = false;
8911    for item in &stmt.items {
8912        if let SelectItem::Expr { expr, .. } = item {
8913            any = any || expr_has_subquery(expr);
8914        }
8915    }
8916    if let Some(w) = &stmt.where_ {
8917        any = any || expr_has_subquery(w);
8918    }
8919    if let Some(h) = &stmt.having {
8920        any = any || expr_has_subquery(h);
8921    }
8922    for o in &stmt.order_by {
8923        any = any || expr_has_subquery(&o.expr);
8924    }
8925    for (_, peer) in &stmt.unions {
8926        any = any || expr_tree_has_subquery(peer);
8927    }
8928    any
8929}
8930
8931fn expr_has_subquery(e: &Expr) -> bool {
8932    match e {
8933        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
8934        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
8935        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8936            expr_has_subquery(expr)
8937        }
8938        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
8939        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
8940        Expr::Extract { source, .. } => expr_has_subquery(source),
8941        Expr::WindowFunction {
8942            args,
8943            partition_by,
8944            order_by,
8945            ..
8946        } => {
8947            args.iter().any(expr_has_subquery)
8948                || partition_by.iter().any(expr_has_subquery)
8949                || order_by.iter().any(|(e, _)| expr_has_subquery(e))
8950        }
8951        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
8952        Expr::Array(items) => items.iter().any(expr_has_subquery),
8953        Expr::ArraySubscript { target, index } => {
8954            expr_has_subquery(target) || expr_has_subquery(index)
8955        }
8956        Expr::AnyAll { expr, array, .. } => expr_has_subquery(expr) || expr_has_subquery(array),
8957        Expr::Case {
8958            operand,
8959            branches,
8960            else_branch,
8961        } => {
8962            operand.as_deref().is_some_and(expr_has_subquery)
8963                || branches
8964                    .iter()
8965                    .any(|(w, t)| expr_has_subquery(w) || expr_has_subquery(t))
8966                || else_branch.as_deref().is_some_and(expr_has_subquery)
8967        }
8968    }
8969}
8970
8971/// v4.10 helper: materialise a runtime `Value` back into an AST
8972/// `Expr::Literal` for the subquery-rewrite path. Supports the
8973/// types `Literal` can represent (Integer / Float / Text / Bool /
8974/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
8975/// would lose precision through Literal and aren't supported in
8976/// uncorrelated-subquery results; they error with a clear hint.
8977fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
8978    let lit = match v {
8979        Value::Null => Literal::Null,
8980        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
8981        Value::Int(n) => Literal::Integer(i64::from(n)),
8982        Value::BigInt(n) => Literal::Integer(n),
8983        Value::Float(x) => Literal::Float(x),
8984        Value::Text(s) | Value::Json(s) => Literal::String(s),
8985        Value::Bool(b) => Literal::Bool(b),
8986        other => {
8987            return Err(EngineError::Unsupported(alloc::format!(
8988                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
8989                other.data_type()
8990            )));
8991        }
8992    };
8993    Ok(Expr::Literal(lit))
8994}
8995
8996/// v7.13.0 — wider helper used by `INSERT … SELECT` (mailrs
8997/// round-5 G4). Covers the most common `Value` variants. Types
8998/// that need lossy textual round-trip (BYTEA, arrays, ts*)
8999/// surface as an Unsupported error so the caller can add a cast
9000/// in the inner SELECT.
9001fn value_to_literal_expr_permissive(v: Value) -> Result<Expr, EngineError> {
9002    let lit = match v {
9003        Value::Null => Literal::Null,
9004        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
9005        Value::Int(n) => Literal::Integer(i64::from(n)),
9006        Value::BigInt(n) => Literal::Integer(n),
9007        Value::Float(x) => Literal::Float(x),
9008        Value::Text(s) | Value::Json(s) => Literal::String(s),
9009        Value::Bool(b) => Literal::Bool(b),
9010        Value::Vector(xs) => Literal::Vector(xs),
9011        // Date / Timestamp / Timestamptz / Numeric round-trip
9012        // through a TEXT literal that `coerce_value` re-parses
9013        // against the target column type.
9014        Value::Date(days) => {
9015            let micros = (i64::from(days)) * 86_400_000_000;
9016            Literal::String(format_timestamp_micros_as_date(micros))
9017        }
9018        Value::Timestamp(us) => Literal::String(format_timestamp_micros(us)),
9019        Value::Numeric { scaled, scale } => {
9020            Literal::String(format_numeric(scaled, scale))
9021        }
9022        other => {
9023            return Err(EngineError::Unsupported(alloc::format!(
9024                "INSERT … SELECT cannot materialise value of type {:?}; \
9025                 add an explicit CAST in the inner SELECT",
9026                other.data_type()
9027            )));
9028        }
9029    };
9030    Ok(Expr::Literal(lit))
9031}
9032
9033fn format_timestamp_micros(us: i64) -> String {
9034    // Same Y/M/D split used by the wire layer; epoch-relative.
9035    let days = us.div_euclid(86_400_000_000);
9036    let intra_day = us.rem_euclid(86_400_000_000);
9037    let date = format_timestamp_micros_as_date(days * 86_400_000_000);
9038    let secs = intra_day / 1_000_000;
9039    let us_rem = intra_day % 1_000_000;
9040    let h = (secs / 3600) % 24;
9041    let m = (secs / 60) % 60;
9042    let s = secs % 60;
9043    if us_rem == 0 {
9044        alloc::format!("{date} {h:02}:{m:02}:{s:02}")
9045    } else {
9046        alloc::format!("{date} {h:02}:{m:02}:{s:02}.{us_rem:06}")
9047    }
9048}
9049
9050fn format_timestamp_micros_as_date(us: i64) -> String {
9051    // Days since 1970-01-01 → calendar Y-M-D via the proleptic
9052    // Gregorian conversion used by spg-engine's date helpers.
9053    let days = us.div_euclid(86_400_000_000);
9054    // 1970-01-01 = JDN 2440588.
9055    let jdn = days + 2_440_588;
9056    let (y, mo, d) = jdn_to_ymd(jdn);
9057    alloc::format!("{y:04}-{mo:02}-{d:02}")
9058}
9059
9060fn jdn_to_ymd(jdn: i64) -> (i64, u32, u32) {
9061    // Fliegel & Van Flandern (1968) — works for all positive JDNs.
9062    let l = jdn + 68569;
9063    let n = (4 * l) / 146_097;
9064    let l = l - (146_097 * n + 3) / 4;
9065    let i = (4000 * (l + 1)) / 1_461_001;
9066    let l = l - (1461 * i) / 4 + 31;
9067    let j = (80 * l) / 2447;
9068    let day = (l - (2447 * j) / 80) as u32;
9069    let l = j / 11;
9070    let month = (j + 2 - 12 * l) as u32;
9071    let year = 100 * (n - 49) + i + l;
9072    (year, month, day)
9073}
9074
9075fn format_numeric(scaled: i128, scale: u8) -> String {
9076    if scale == 0 {
9077        return alloc::format!("{scaled}");
9078    }
9079    let abs = scaled.unsigned_abs();
9080    let divisor = 10u128.pow(u32::from(scale));
9081    let whole = abs / divisor;
9082    let frac = abs % divisor;
9083    let sign = if scaled < 0 { "-" } else { "" };
9084    alloc::format!(
9085        "{sign}{whole}.{frac:0width$}",
9086        width = usize::from(scale)
9087    )
9088}
9089
9090/// v6.1.1 — walk the prepared `Statement` AST and replace every
9091/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
9092/// params[n-1]))`. The dispatch downstream sees a `Statement`
9093/// indistinguishable from a simple-query parse, so the exec path
9094/// stays unchanged.
9095///
9096/// Errors fall into one shape: a `$N` references past the bound
9097/// `params.len()`. Out-of-range happens when the Bind didn't
9098/// supply enough values; pgwire surfaces this as a protocol error
9099/// to the client.
9100/// v7.15.0 — rewrite every (potentially-qualified) column
9101/// identifier matching `old` to `new` in a stored SQL source
9102/// string. Used by `ALTER TABLE … RENAME COLUMN` to patch
9103/// CHECK predicate sources, partial-index predicate sources,
9104/// and runtime DEFAULT expression sources before they get
9105/// re-parsed on the next INSERT/UPDATE.
9106///
9107/// Round-trips through the parser, so the rewritten output is
9108/// the canonical Display form (matches what the engine stores
9109/// for fresh predicates). If the source doesn't parse, surfaces
9110/// the parse error — the invariant that stored predicates are
9111/// in canonical Display form means a parse failure here is a
9112/// real bug, not a user mistake to swallow.
9113fn rewrite_column_in_source(
9114    src: &str,
9115    old: &str,
9116    new: &str,
9117) -> Result<alloc::string::String, EngineError> {
9118    let mut expr = spg_sql::parser::parse_expression(src).map_err(|e| {
9119        EngineError::Unsupported(alloc::format!(
9120            "ALTER TABLE RENAME COLUMN: stored predicate source {src:?} \
9121             failed to parse for rewrite ({e})"
9122        ))
9123    })?;
9124    rewrite_column_in_expr(&mut expr, old, new);
9125    Ok(alloc::format!("{expr}"))
9126}
9127
9128/// v7.15.0 — Expr walker that swaps `Expr::Column { name: old, .. }`
9129/// for `Expr::Column { name: new, .. }`. Qualifier is preserved
9130/// (e.g. `t.old` → `t.new`); a foreign-table qualifier still
9131/// gets rewritten because the AST has no way to tell us this
9132/// predicate is on table T versus table T2 — predicate sources
9133/// in SPG are always scoped to the owning table, so any
9134/// qualifier present is either redundant or wrong.
9135fn rewrite_column_in_expr(e: &mut Expr, old: &str, new: &str) {
9136    match e {
9137        Expr::Column(c) => {
9138            if c.name.eq_ignore_ascii_case(old) {
9139                c.name = new.to_string();
9140            }
9141        }
9142        Expr::Binary { lhs, rhs, .. } => {
9143            rewrite_column_in_expr(lhs, old, new);
9144            rewrite_column_in_expr(rhs, old, new);
9145        }
9146        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9147            rewrite_column_in_expr(expr, old, new);
9148        }
9149        Expr::FunctionCall { args, .. } => {
9150            for a in args {
9151                rewrite_column_in_expr(a, old, new);
9152            }
9153        }
9154        Expr::Like { expr, pattern, .. } => {
9155            rewrite_column_in_expr(expr, old, new);
9156            rewrite_column_in_expr(pattern, old, new);
9157        }
9158        Expr::Extract { source, .. } => rewrite_column_in_expr(source, old, new),
9159        Expr::WindowFunction {
9160            args,
9161            partition_by,
9162            order_by,
9163            ..
9164        } => {
9165            for a in args {
9166                rewrite_column_in_expr(a, old, new);
9167            }
9168            for p in partition_by {
9169                rewrite_column_in_expr(p, old, new);
9170            }
9171            for (o, _) in order_by {
9172                rewrite_column_in_expr(o, old, new);
9173            }
9174        }
9175        Expr::Array(items) => {
9176            for elem in items {
9177                rewrite_column_in_expr(elem, old, new);
9178            }
9179        }
9180        Expr::ArraySubscript { target, index } => {
9181            rewrite_column_in_expr(target, old, new);
9182            rewrite_column_in_expr(index, old, new);
9183        }
9184        Expr::AnyAll { expr, array, .. } => {
9185            rewrite_column_in_expr(expr, old, new);
9186            rewrite_column_in_expr(array, old, new);
9187        }
9188        Expr::Case {
9189            operand,
9190            branches,
9191            else_branch,
9192        } => {
9193            if let Some(o) = operand {
9194                rewrite_column_in_expr(o, old, new);
9195            }
9196            for (w, t) in branches {
9197                rewrite_column_in_expr(w, old, new);
9198                rewrite_column_in_expr(t, old, new);
9199            }
9200            if let Some(e) = else_branch {
9201                rewrite_column_in_expr(e, old, new);
9202            }
9203        }
9204        // Stored predicate sources never contain subqueries —
9205        // CHECK / partial-index / runtime_default are all scalar.
9206        // If a future feature changes that, recurse here.
9207        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => {}
9208        Expr::Literal(_) | Expr::Placeholder(_) => {}
9209    }
9210}
9211
9212/// v7.16.0 — walks a parsed statement and replaces every
9213/// `Expr::Placeholder(N)` with the corresponding `params[N-1]`
9214/// re-encoded as an `Expr::Literal`. Used internally by
9215/// `Engine::execute_prepared` AND surfaced for the spg-embedded
9216/// WAL path (which needs the bind-final AST so replay sees a
9217/// simple-query-shaped statement, not a `$1`-shaped one). Errors
9218/// when a placeholder references an index past the params slice.
9219pub fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
9220    match stmt {
9221        Statement::Select(s) => substitute_select(s, params)?,
9222        Statement::Insert(ins) => {
9223            for row in &mut ins.rows {
9224                for e in row {
9225                    substitute_expr(e, params)?;
9226                }
9227            }
9228        }
9229        Statement::Update(u) => {
9230            for (_, e) in &mut u.assignments {
9231                substitute_expr(e, params)?;
9232            }
9233            if let Some(w) = &mut u.where_ {
9234                substitute_expr(w, params)?;
9235            }
9236        }
9237        Statement::Delete(d) => {
9238            if let Some(w) = &mut d.where_ {
9239                substitute_expr(w, params)?;
9240            }
9241        }
9242        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
9243        // Other statements (CREATE / BEGIN / SHOW / …) have no
9244        // expression slots; no walk needed.
9245        _ => {}
9246    }
9247    Ok(())
9248}
9249
9250fn substitute_select(s: &mut SelectStatement, params: &[Value]) -> Result<(), EngineError> {
9251    for item in &mut s.items {
9252        if let SelectItem::Expr { expr, .. } = item {
9253            substitute_expr(expr, params)?;
9254        }
9255    }
9256    if let Some(w) = &mut s.where_ {
9257        substitute_expr(w, params)?;
9258    }
9259    if let Some(gs) = &mut s.group_by {
9260        for g in gs {
9261            substitute_expr(g, params)?;
9262        }
9263    }
9264    if let Some(h) = &mut s.having {
9265        substitute_expr(h, params)?;
9266    }
9267    for o in &mut s.order_by {
9268        substitute_expr(&mut o.expr, params)?;
9269    }
9270    for (_, peer) in &mut s.unions {
9271        substitute_select(peer, params)?;
9272    }
9273    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
9274    // mailrs H2. After this pass each LIMIT/OFFSET that was a
9275    // Placeholder is rewritten to Literal so the existing
9276    // `LimitExpr::as_literal` path consumes a concrete u32.
9277    if let Some(le) = s.limit {
9278        s.limit = Some(resolve_limit_placeholder(le, params)?);
9279    }
9280    if let Some(le) = s.offset {
9281        s.offset = Some(resolve_limit_placeholder(le, params)?);
9282    }
9283    Ok(())
9284}
9285
9286fn resolve_limit_placeholder(
9287    le: spg_sql::ast::LimitExpr,
9288    params: &[Value],
9289) -> Result<spg_sql::ast::LimitExpr, EngineError> {
9290    use spg_sql::ast::LimitExpr;
9291    match le {
9292        LimitExpr::Literal(_) => Ok(le),
9293        LimitExpr::Placeholder(n) => {
9294            let idx = usize::from(n).saturating_sub(1);
9295            let v = params.get(idx).ok_or_else(|| {
9296                EngineError::Eval(EvalError::PlaceholderOutOfRange {
9297                    n,
9298                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
9299                })
9300            })?;
9301            let int = match v {
9302                Value::SmallInt(x) => Some(i64::from(*x)),
9303                Value::Int(x) => Some(i64::from(*x)),
9304                Value::BigInt(x) => Some(*x),
9305                _ => None,
9306            }
9307            .ok_or_else(|| {
9308                EngineError::Unsupported(alloc::format!(
9309                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
9310                ))
9311            })?;
9312            if int < 0 {
9313                return Err(EngineError::Unsupported(alloc::format!(
9314                    "LIMIT/OFFSET ${n} bound to negative value {int}"
9315                )));
9316            }
9317            let bounded = u32::try_from(int).map_err(|_| {
9318                EngineError::Unsupported(alloc::format!(
9319                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
9320                ))
9321            })?;
9322            Ok(LimitExpr::Literal(bounded))
9323        }
9324    }
9325}
9326
9327fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
9328    if let Expr::Placeholder(n) = e {
9329        let idx = usize::from(*n).saturating_sub(1);
9330        let v = params.get(idx).ok_or_else(|| {
9331            EngineError::Eval(EvalError::PlaceholderOutOfRange {
9332                n: *n,
9333                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
9334            })
9335        })?;
9336        *e = Expr::Literal(value_to_literal(v.clone()));
9337        return Ok(());
9338    }
9339    match e {
9340        Expr::Binary { lhs, rhs, .. } => {
9341            substitute_expr(lhs, params)?;
9342            substitute_expr(rhs, params)?;
9343        }
9344        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9345            substitute_expr(expr, params)?;
9346        }
9347        Expr::FunctionCall { args, .. } => {
9348            for a in args {
9349                substitute_expr(a, params)?;
9350            }
9351        }
9352        Expr::Like { expr, pattern, .. } => {
9353            substitute_expr(expr, params)?;
9354            substitute_expr(pattern, params)?;
9355        }
9356        Expr::Extract { source, .. } => substitute_expr(source, params)?,
9357        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
9358        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
9359        Expr::InSubquery { expr, subquery, .. } => {
9360            substitute_expr(expr, params)?;
9361            substitute_select(subquery, params)?;
9362        }
9363        Expr::WindowFunction {
9364            args,
9365            partition_by,
9366            order_by,
9367            ..
9368        } => {
9369            for a in args {
9370                substitute_expr(a, params)?;
9371            }
9372            for p in partition_by {
9373                substitute_expr(p, params)?;
9374            }
9375            for (e, _) in order_by {
9376                substitute_expr(e, params)?;
9377            }
9378        }
9379        Expr::Literal(_) | Expr::Column(_) => {}
9380        // Already handled above.
9381        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
9382        Expr::Array(items) => {
9383            for elem in items {
9384                substitute_expr(elem, params)?;
9385            }
9386        }
9387        Expr::ArraySubscript { target, index } => {
9388            substitute_expr(target, params)?;
9389            substitute_expr(index, params)?;
9390        }
9391        Expr::AnyAll { expr, array, .. } => {
9392            substitute_expr(expr, params)?;
9393            substitute_expr(array, params)?;
9394        }
9395        Expr::Case {
9396            operand,
9397            branches,
9398            else_branch,
9399        } => {
9400            if let Some(o) = operand {
9401                substitute_expr(o, params)?;
9402            }
9403            for (w, t) in branches {
9404                substitute_expr(w, params)?;
9405                substitute_expr(t, params)?;
9406            }
9407            if let Some(e) = else_branch {
9408                substitute_expr(e, params)?;
9409            }
9410        }
9411    }
9412    Ok(())
9413}
9414
9415/// v6.1.1 — convert a runtime `Value` into the closest matching
9416/// `Literal` for the substitute walker. Lossless for the simple
9417/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
9418/// / Json / Interval render as their canonical text form so the
9419/// downstream coerce_value can re-parse against the target column
9420/// type. SQ8 / HalfVector cells are NOT expected as bind params;
9421/// pgwire's Bind decodes vector params to the f32 representation
9422/// before they reach this helper.
9423/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
9424/// column's non-NULL sample before histogram building. Cross-type
9425/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
9426/// same widening the eval-side `compare` operator uses; everything
9427/// else (the genuinely-incompatible pairs) falls back to ordering
9428/// by canonical string form so the sort is still total + stable.
9429/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
9430/// here only via the string-fallback path because vector columns
9431/// are filtered out upstream.
9432fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
9433    use core::cmp::Ordering;
9434    match (a, b) {
9435        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
9436        (Value::Int(a), Value::Int(b)) => a.cmp(b),
9437        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
9438        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
9439        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
9440        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9441        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
9442        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9443        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
9444        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
9445        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
9446        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
9447        (Value::Date(a), Value::Date(b)) => a.cmp(b),
9448        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
9449        // Mixed numeric/float — widen to f64 and compare.
9450        (Value::SmallInt(n), Value::Float(x)) => {
9451            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9452        }
9453        (Value::Float(x), Value::SmallInt(n)) => {
9454            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
9455        }
9456        (Value::Int(n), Value::Float(x)) => {
9457            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9458        }
9459        (Value::Float(x), Value::Int(n)) => {
9460            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
9461        }
9462        (Value::BigInt(n), Value::Float(x)) => {
9463            #[allow(clippy::cast_precision_loss)]
9464            let nf = *n as f64;
9465            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
9466        }
9467        (Value::Float(x), Value::BigInt(n)) => {
9468            #[allow(clippy::cast_precision_loss)]
9469            let nf = *n as f64;
9470            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
9471        }
9472        // Cross-type fallback: lexicographic on canonical form.
9473        // Total + stable so the sort is well-defined.
9474        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
9475    }
9476}
9477
9478/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
9479/// string for the `spg_statistic.histogram_bounds` column. Values
9480/// containing `,` or `[` / `]` are JSON-style escaped so the
9481/// rendering round-trips through a future parser; v6.2.0 only
9482/// uses the rendered form for human consumption, so the escaping
9483/// is conservative.
9484fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
9485    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
9486    out.push('[');
9487    for (i, b) in bounds.iter().enumerate() {
9488        if i > 0 {
9489            out.push_str(", ");
9490        }
9491        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
9492        if needs_quote {
9493            out.push('"');
9494            for ch in b.chars() {
9495                if ch == '"' || ch == '\\' {
9496                    out.push('\\');
9497                }
9498                out.push(ch);
9499            }
9500            out.push('"');
9501        } else {
9502            out.push_str(b);
9503        }
9504    }
9505    out.push(']');
9506    out
9507}
9508
9509/// v6.2.0 — canonical textual form of a `Value` for histogram
9510/// bound storage. Strings used by ANALYZE for sort + bound output.
9511/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
9512/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
9513/// the same form `format_date` / `format_timestamp` produce for
9514/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
9515/// reach this only via a non-Vector column (vector columns are
9516/// skipped upstream); they fall back to a Debug-derived form so
9517/// stats still serialise without crashing.
9518pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
9519    match v {
9520        Value::Null => "NULL".to_string(),
9521        Value::SmallInt(n) => alloc::format!("{n}"),
9522        Value::Int(n) => alloc::format!("{n}"),
9523        Value::BigInt(n) => alloc::format!("{n}"),
9524        Value::Float(x) => alloc::format!("{x:?}"),
9525        Value::Text(s) | Value::Json(s) => s.clone(),
9526        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
9527        Value::Date(d) => eval::format_date(*d),
9528        Value::Timestamp(t) => eval::format_timestamp(*t),
9529        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
9530        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
9531        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
9532            // Unreachable in practice (vector columns are filtered
9533            // out before this). Defensive fallback so a future
9534            // vector-stats path doesn't crash.
9535            alloc::format!("{v:?}")
9536        }
9537        // v7.5.0 — Value is #[non_exhaustive] for downstream
9538        // forward-compat. Future variants fall through to Debug
9539        // form here (same shape as the vector fallback above).
9540        _ => alloc::format!("{v:?}"),
9541    }
9542}
9543
9544/// v6.2.0 — true for engine-managed catalog tables that the bare
9545/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
9546/// tables yet (publications / subscriptions / users / statistics
9547/// all live as engine fields, not catalog tables), so this is a
9548/// reserved future-proofing hook — every existing user table is
9549/// analysed.
9550const fn is_internal_table_name(_name: &str) -> bool {
9551    false
9552}
9553
9554fn value_to_literal(v: Value) -> Literal {
9555    match v {
9556        Value::Null => Literal::Null,
9557        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
9558        Value::Int(n) => Literal::Integer(i64::from(n)),
9559        Value::BigInt(n) => Literal::Integer(n),
9560        Value::Float(x) => Literal::Float(x),
9561        Value::Text(s) | Value::Json(s) => Literal::String(s),
9562        Value::Bool(b) => Literal::Bool(b),
9563        Value::Vector(v) => Literal::Vector(v),
9564        Value::Numeric { scaled, scale } => Literal::String(eval::format_numeric(scaled, scale)),
9565        Value::Date(d) => Literal::String(eval::format_date(d)),
9566        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
9567        // v7.16.0 — BYTEA round-trip for the spg-sqlx Bind path.
9568        // PG-canonical text rep is `\x` + lowercase hex; the
9569        // engine's coerce_value already accepts that on the
9570        // text → bytea direction.
9571        Value::Bytes(b) => Literal::String(eval::format_bytea_hex(&b)),
9572        // v7.16.0 — array round-trip for the spg-sqlx Bind
9573        // path. Render as PG external form `{a,b,c}`; the
9574        // engine's text → array coerce (just below in
9575        // coerce_value) accepts it on the matching column type.
9576        Value::TextArray(items) => Literal::String(eval::format_text_array(&items)),
9577        Value::IntArray(items) => Literal::String(eval::format_int_array(&items)),
9578        Value::BigIntArray(items) => Literal::String(eval::format_bigint_array(&items)),
9579        Value::Interval { months, micros } => Literal::Interval {
9580            months,
9581            micros,
9582            text: eval::format_interval(months, micros),
9583        },
9584        // SQ8 / halfvec cells dequantise to f32 before reaching the
9585        // substitute walker; pgwire's Bind path handles that.
9586        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
9587        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
9588        // v7.5.0 — Value is #[non_exhaustive]; future variants
9589        // render as Debug-form String literal until explicit
9590        // mapping is added.
9591        v => Literal::String(alloc::format!("{v:?}")),
9592    }
9593}
9594
9595fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
9596    let Some(now) = now_micros else {
9597        return;
9598    };
9599    match stmt {
9600        Statement::Select(s) => rewrite_select_clock(s, now),
9601        Statement::Insert(ins) => {
9602            for row in &mut ins.rows {
9603                for e in row {
9604                    rewrite_expr_clock(e, now);
9605                }
9606            }
9607        }
9608        _ => {}
9609    }
9610}
9611
9612fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
9613    for item in &mut s.items {
9614        if let SelectItem::Expr { expr, .. } = item {
9615            rewrite_expr_clock(expr, now);
9616        }
9617    }
9618    if let Some(w) = &mut s.where_ {
9619        rewrite_expr_clock(w, now);
9620    }
9621    if let Some(gs) = &mut s.group_by {
9622        for g in gs {
9623            rewrite_expr_clock(g, now);
9624        }
9625    }
9626    if let Some(h) = &mut s.having {
9627        rewrite_expr_clock(h, now);
9628    }
9629    for o in &mut s.order_by {
9630        rewrite_expr_clock(&mut o.expr, now);
9631    }
9632    for (_, peer) in &mut s.unions {
9633        rewrite_select_clock(peer, now);
9634    }
9635}
9636
9637/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
9638/// Literal / Column-with-qualifier (the dominant cases on a typical
9639/// AST) take a single pattern dispatch and exit. The clock-rewrite
9640/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
9641/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
9642/// refs) sit on their own arms with match guards so the fall-through
9643/// to the recursive arms is unambiguous.
9644fn rewrite_expr_clock(e: &mut Expr, now: i64) {
9645    // Fast-path test on the no-recursion shapes first. We can't fold
9646    // them into the big match below because they need to *replace* `e`
9647    // outright; the recursive arms below match on its sub-fields.
9648    if let Some(replacement) = clock_replacement_for(e, now) {
9649        *e = replacement;
9650        return;
9651    }
9652    match e {
9653        Expr::Binary { lhs, rhs, .. } => {
9654            rewrite_expr_clock(lhs, now);
9655            rewrite_expr_clock(rhs, now);
9656        }
9657        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9658            rewrite_expr_clock(expr, now);
9659        }
9660        Expr::FunctionCall { args, .. } => {
9661            for a in args {
9662                rewrite_expr_clock(a, now);
9663            }
9664        }
9665        Expr::Like { expr, pattern, .. } => {
9666            rewrite_expr_clock(expr, now);
9667            rewrite_expr_clock(pattern, now);
9668        }
9669        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
9670        // v4.10 subquery nodes — recurse into the inner SELECT's
9671        // expression slots so e.g. SELECT NOW() in a scalar
9672        // subquery picks up the same instant as the outer query.
9673        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
9674        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
9675        Expr::InSubquery { expr, subquery, .. } => {
9676            rewrite_expr_clock(expr, now);
9677            rewrite_select_clock(subquery, now);
9678        }
9679        // v4.12 window functions — args + PARTITION BY + ORDER BY
9680        // may all reference clock literals.
9681        Expr::WindowFunction {
9682            args,
9683            partition_by,
9684            order_by,
9685            ..
9686        } => {
9687            for a in args {
9688                rewrite_expr_clock(a, now);
9689            }
9690            for p in partition_by {
9691                rewrite_expr_clock(p, now);
9692            }
9693            for (e, _) in order_by {
9694                rewrite_expr_clock(e, now);
9695            }
9696        }
9697        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
9698        Expr::Array(items) => {
9699            for elem in items {
9700                rewrite_expr_clock(elem, now);
9701            }
9702        }
9703        Expr::ArraySubscript { target, index } => {
9704            rewrite_expr_clock(target, now);
9705            rewrite_expr_clock(index, now);
9706        }
9707        Expr::AnyAll { expr, array, .. } => {
9708            rewrite_expr_clock(expr, now);
9709            rewrite_expr_clock(array, now);
9710        }
9711        Expr::Case {
9712            operand,
9713            branches,
9714            else_branch,
9715        } => {
9716            if let Some(o) = operand {
9717                rewrite_expr_clock(o, now);
9718            }
9719            for (w, t) in branches {
9720                rewrite_expr_clock(w, now);
9721                rewrite_expr_clock(t, now);
9722            }
9723            if let Some(e) = else_branch {
9724                rewrite_expr_clock(e, now);
9725            }
9726        }
9727    }
9728}
9729
9730/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
9731/// must be rewritten; otherwise `None` so the caller falls through to
9732/// the recursive walk. Identifies both function-call forms (`NOW()` /
9733/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
9734/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
9735/// which is how PG accepts them without parens).
9736fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
9737    let (kind, name) = match e {
9738        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
9739        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
9740        _ => return None,
9741    };
9742    // ASCII case-insensitive name match. Limited to the three keywords
9743    // that actually need rewriting.
9744    let matched = match name.len() {
9745        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => Some(true),
9746        12 if name.eq_ignore_ascii_case("current_date") => Some(false),
9747        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(true),
9748        _ => None,
9749    };
9750    let is_timestamp = matched?;
9751    let payload = if is_timestamp {
9752        now
9753    } else {
9754        now.div_euclid(86_400_000_000)
9755    };
9756    let target = if is_timestamp {
9757        spg_sql::ast::CastTarget::Timestamp
9758    } else {
9759        spg_sql::ast::CastTarget::Date
9760    };
9761    Some(Expr::Cast {
9762        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
9763        target,
9764    })
9765}
9766
9767#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9768enum ClockSite {
9769    Fn,
9770    BareIdent,
9771}
9772
9773/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
9774/// Swap the integer literal for the matching item's expression so the
9775/// executor doesn't need a special-case branch. Recurses into UNION
9776/// peers because each peer keeps its own SELECT list.
9777/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
9778/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
9779/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
9780/// and groups by whatever explicit non-aggregates remain — none in
9781/// the wildcard-only case, which still works for non-aggregate
9782/// queries).
9783fn expand_group_by_all(s: &mut SelectStatement) {
9784    if !s.group_by_all {
9785        for (_, peer) in &mut s.unions {
9786            expand_group_by_all(peer);
9787        }
9788        return;
9789    }
9790    let mut groups: Vec<Expr> = Vec::new();
9791    for item in &s.items {
9792        if let SelectItem::Expr { expr, .. } = item
9793            && !aggregate::contains_aggregate(expr)
9794        {
9795            groups.push(expr.clone());
9796        }
9797    }
9798    s.group_by = Some(groups);
9799    s.group_by_all = false;
9800    for (_, peer) in &mut s.unions {
9801        expand_group_by_all(peer);
9802    }
9803}
9804
9805fn resolve_order_by_position(s: &mut SelectStatement) {
9806    // v6.4.0 — iterate every ORDER BY key. Position references
9807    // (`ORDER BY 2`) bind to the 1-based projection index;
9808    // identifier references that match a SELECT-list alias bind to
9809    // the projected expression (Step 4 of L3a).
9810    for order in &mut s.order_by {
9811        match &order.expr {
9812            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
9813                if let Ok(idx_one_based) = usize::try_from(*n) {
9814                    let idx = idx_one_based - 1;
9815                    if idx < s.items.len()
9816                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
9817                    {
9818                        order.expr = expr.clone();
9819                    }
9820                }
9821            }
9822            Expr::Column(c) if c.qualifier.is_none() => {
9823                // Alias-in-ORDER-BY lookup.
9824                for item in &s.items {
9825                    if let SelectItem::Expr {
9826                        expr,
9827                        alias: Some(a),
9828                    } = item
9829                        && a == &c.name
9830                    {
9831                        order.expr = expr.clone();
9832                        break;
9833                    }
9834                }
9835            }
9836            _ => {}
9837        }
9838    }
9839    for (_, peer) in &mut s.unions {
9840        resolve_order_by_position(peer);
9841    }
9842}
9843
9844/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
9845/// Used by the UNION ORDER BY path; per-block paths inline the same
9846/// comparator because they already hold `&OrderBy` directly.
9847/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
9848/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
9849/// partition the prefix in O(n), then sort just that prefix in O(k
9850/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
9851/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
9852/// full-sort behaviour.
9853///
9854/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
9855/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
9856fn partial_sort_tagged(tagged: &mut Vec<(Vec<f64>, Row)>, keep: Option<usize>, descs: &[bool]) {
9857    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
9858    match keep {
9859        Some(k) if k < tagged.len() && k > 0 => {
9860            let pivot = k - 1;
9861            tagged.select_nth_unstable_by(pivot, cmp);
9862            tagged[..k].sort_by(cmp);
9863            tagged.truncate(k);
9864        }
9865        _ => {
9866            tagged.sort_by(cmp);
9867        }
9868    }
9869}
9870
9871fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
9872    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
9873}
9874
9875/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
9876/// flag is honored independently. NULL is encoded as `f64::INFINITY`
9877/// so it sorts last in ASC and first in DESC (matches PG default).
9878fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
9879    use core::cmp::Ordering;
9880    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
9881        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
9882        let ord = if descs.get(i).copied().unwrap_or(false) {
9883            ord.reverse()
9884        } else {
9885            ord
9886        };
9887        if ord != Ordering::Equal {
9888            return ord;
9889        }
9890    }
9891    Ordering::Equal
9892}
9893
9894/// v6.4.0 — eval every ORDER BY expression for a row and pack the
9895/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
9896fn build_order_keys(
9897    order_by: &[OrderBy],
9898    row: &Row,
9899    ctx: &EvalContext,
9900) -> Result<Vec<f64>, EngineError> {
9901    let mut keys = Vec::with_capacity(order_by.len());
9902    for o in order_by {
9903        let v = eval::eval_expr(&o.expr, row, ctx)?;
9904        keys.push(value_to_order_key(&v)?);
9905    }
9906    Ok(keys)
9907}
9908
9909/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
9910/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
9911/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
9912fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
9913    if let Some(off) = offset {
9914        let off = off as usize;
9915        if off >= rows.len() {
9916            rows.clear();
9917        } else {
9918            rows.drain(..off);
9919        }
9920    }
9921    if let Some(n) = limit {
9922        rows.truncate(n as usize);
9923    }
9924}
9925
9926/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
9927/// names + parent table name) into the storage-layer shape (column
9928/// indices + same parent table). Validates everything the engine
9929/// needs to know about the FK at CREATE TABLE time:
9930///
9931///   - parent table exists (catalog lookup, unless self-referencing)
9932///   - parent columns exist on the parent table
9933///   - parent column list matches the local arity (defaults to the
9934///     parent's primary index column when omitted)
9935///   - parent columns are covered by a `BTree` UNIQUE-class index
9936///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
9937///     the v7.6.2 INSERT path can do an O(log n) parent lookup
9938///   - local columns exist on the table being created
9939fn resolve_foreign_key(
9940    local_table_name: &str,
9941    local_cols: &[ColumnSchema],
9942    fk: spg_sql::ast::ForeignKeyConstraint,
9943    catalog: &Catalog,
9944) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
9945    // Resolve local columns.
9946    let mut local_columns = Vec::with_capacity(fk.columns.len());
9947    for name in &fk.columns {
9948        let pos = local_cols
9949            .iter()
9950            .position(|c| c.name == *name)
9951            .ok_or_else(|| {
9952                EngineError::Unsupported(alloc::format!(
9953                    "FOREIGN KEY references unknown local column {name:?}"
9954                ))
9955            })?;
9956        local_columns.push(pos);
9957    }
9958    // Self-referencing FK: parent table is the one we're creating.
9959    // The parent column resolution uses the local column list since
9960    // the catalog doesn't have this table yet.
9961    let is_self_ref = fk.parent_table == local_table_name;
9962    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
9963        (local_cols, local_table_name)
9964    } else {
9965        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
9966            EngineError::Storage(StorageError::TableNotFound {
9967                name: fk.parent_table.clone(),
9968            })
9969        })?;
9970        (
9971            parent_table.schema().columns.as_slice(),
9972            fk.parent_table.as_str(),
9973        )
9974    };
9975    // Resolve parent column names → positions. If the FK omitted the
9976    // parent column list, fall back to the parent's primary index
9977    // column (single-column only — composite default is rejected
9978    // because there's no unambiguous "PK" in SPG's index list).
9979    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
9980        if fk.columns.len() != 1 {
9981            return Err(EngineError::Unsupported(
9982                "composite FOREIGN KEY without explicit parent column list is not supported \
9983                 — list the parent columns explicitly"
9984                    .into(),
9985            ));
9986        }
9987        // Find a single BTree index on the parent and use its column.
9988        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
9989            .ok_or_else(|| {
9990                EngineError::Unsupported(alloc::format!(
9991                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
9992                     to default the FOREIGN KEY against"
9993                ))
9994            })?;
9995        alloc::vec![pos]
9996    } else {
9997        let mut out = Vec::with_capacity(fk.parent_columns.len());
9998        for name in &fk.parent_columns {
9999            let pos = parent_cols_for_lookup
10000                .iter()
10001                .position(|c| c.name == *name)
10002                .ok_or_else(|| {
10003                    EngineError::Unsupported(alloc::format!(
10004                        "FOREIGN KEY references unknown parent column \
10005                         {name:?} on table {parent_table_str:?}"
10006                    ))
10007                })?;
10008            out.push(pos);
10009        }
10010        out
10011    };
10012    if parent_columns.len() != local_columns.len() {
10013        return Err(EngineError::Unsupported(alloc::format!(
10014            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
10015            local_columns.len(),
10016            parent_columns.len()
10017        )));
10018    }
10019    // For non-self-referencing FKs, verify the parent column set is
10020    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
10021    // declaration; the convention is "the parent column for FK
10022    // purposes must have a BTree index" — which the user creates via
10023    // `CREATE INDEX ... USING btree (col)` (the default). We accept
10024    // any single-column BTree index that covers a parent column;
10025    // composite parent column lists require an index whose `column_position`
10026    // matches the first parent column (multi-column BTree indices
10027    // are not in the v7.x roadmap).
10028    if !is_self_ref {
10029        let parent_table = catalog.get(&fk.parent_table).expect("checked above");
10030        let primary_parent_col = parent_columns[0];
10031        let has_btree = parent_table
10032            .schema()
10033            .columns
10034            .get(primary_parent_col)
10035            .is_some()
10036            && parent_table.indices().iter().any(|idx| {
10037                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10038                    && idx.column_position == primary_parent_col
10039                    && idx.partial_predicate.is_none()
10040            });
10041        if !has_btree {
10042            return Err(EngineError::Unsupported(alloc::format!(
10043                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
10044                 index — create one with `CREATE INDEX ... ON {} ({})` first",
10045                parent_table_str,
10046                parent_table_str,
10047                parent_table.schema().columns[primary_parent_col].name,
10048            )));
10049        }
10050    }
10051    let on_delete = fk_action_sql_to_storage(fk.on_delete);
10052    let on_update = fk_action_sql_to_storage(fk.on_update);
10053    Ok(spg_storage::ForeignKeyConstraint {
10054        name: fk.name,
10055        local_columns,
10056        parent_table: fk.parent_table,
10057        parent_columns,
10058        on_delete,
10059        on_update,
10060    })
10061}
10062
10063/// v7.6.1 — pick a sentinel "primary key" column from the parent
10064/// table when the FK didn't name parent columns. Picks the first
10065/// single-column unconditional BTree index — that's the closest
10066/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
10067/// `local_cols` as the column source.
10068fn pick_pk_index_column(
10069    catalog: &Catalog,
10070    parent_name: &str,
10071    is_self_ref: bool,
10072    local_cols: &[ColumnSchema],
10073) -> Option<usize> {
10074    if is_self_ref {
10075        // Self-ref FK omitted parent columns: pick column 0 by
10076        // convention (no catalog entry yet). Engine will widen this
10077        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
10078        let _ = local_cols;
10079        return Some(0);
10080    }
10081    let parent = catalog.get(parent_name)?;
10082    parent.indices().iter().find_map(|idx| {
10083        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10084            && idx.partial_predicate.is_none()
10085            && idx.included_columns.is_empty()
10086            && idx.expression.is_none()
10087        {
10088            Some(idx.column_position)
10089        } else {
10090            None
10091        }
10092    })
10093}
10094
10095/// v7.9.8 / v7.9.10 — resolve the column positions that
10096/// identify a conflict for ON CONFLICT. Returns a Vec of
10097/// column positions (1 element for single-column form, N for
10098/// composite). When the user wrote bare `ON CONFLICT DO …`,
10099/// falls back to the table's first unconditional BTree index
10100/// (always single-column today).
10101fn resolve_on_conflict_columns(
10102    catalog: &Catalog,
10103    table_name: &str,
10104    target: &[String],
10105) -> Result<Vec<usize>, EngineError> {
10106    let table = catalog.get(table_name).ok_or_else(|| {
10107        EngineError::Storage(StorageError::TableNotFound {
10108            name: table_name.into(),
10109        })
10110    })?;
10111    if target.is_empty() {
10112        // v7.13.2 — mailrs round-6 S5 follow-up. Composite UNIQUE
10113        // constraints carry a multi-column tuple; the prior code
10114        // path picked only the leading column of the first BTree
10115        // index, which caused `ON CONFLICT DO NOTHING` to dedup
10116        // by leading column alone (3 rows with same group_id but
10117        // different permission collapsed to 1). PG semantics use
10118        // the full tuple. Prefer a UniquenessConstraint's full
10119        // column list when one exists; fall back to the leading
10120        // BTree column for legacy single-column UNIQUE.
10121        if let Some(uc) = table.schema().uniqueness_constraints.first() {
10122            return Ok(uc.columns.clone());
10123        }
10124        let pos = table
10125            .indices()
10126            .iter()
10127            .find_map(|idx| {
10128                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10129                    && idx.partial_predicate.is_none()
10130                    && idx.included_columns.is_empty()
10131                    && idx.expression.is_none()
10132                {
10133                    Some(idx.column_position)
10134                } else {
10135                    None
10136                }
10137            })
10138            .ok_or_else(|| {
10139                EngineError::Unsupported(alloc::format!(
10140                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
10141                ))
10142            })?;
10143        return Ok(alloc::vec![pos]);
10144    }
10145    let mut out = Vec::with_capacity(target.len());
10146    for name in target {
10147        let pos = table
10148            .schema()
10149            .columns
10150            .iter()
10151            .position(|c| c.name == *name)
10152            .ok_or_else(|| {
10153                EngineError::Unsupported(alloc::format!(
10154                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
10155                ))
10156            })?;
10157        out.push(pos);
10158    }
10159    Ok(out)
10160}
10161
10162/// v7.9.8 — check whether the BTree index on `column_pos` of
10163/// `table_name` already has a row with this key.
10164fn on_conflict_key_exists(
10165    catalog: &Catalog,
10166    table_name: &str,
10167    column_pos: usize,
10168    key: &Value,
10169) -> bool {
10170    let Some(table) = catalog.get(table_name) else {
10171        return false;
10172    };
10173    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
10174        return false;
10175    };
10176    table.indices().iter().any(|idx| {
10177        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10178            && idx.column_position == column_pos
10179            && idx.partial_predicate.is_none()
10180            && !idx.lookup_eq(&idx_key).is_empty()
10181    })
10182}
10183
10184/// v7.9.9 / v7.9.10 — look up an existing row's position by
10185/// matching all `column_positions` against the incoming `key`
10186/// tuple. Single-column shape (one column) reduces to the
10187/// canonical PK lookup; composite shapes scan linearly until
10188/// every position matches.
10189fn lookup_row_position_by_keys(
10190    catalog: &Catalog,
10191    table_name: &str,
10192    column_positions: &[usize],
10193    key: &[&Value],
10194) -> Option<usize> {
10195    let table = catalog.get(table_name)?;
10196    table.rows().iter().position(|r| {
10197        column_positions
10198            .iter()
10199            .enumerate()
10200            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
10201    })
10202}
10203
10204/// v7.9.10 — does the table already contain a row whose
10205/// `column_positions` tuple equals `key`? Single-column shape
10206/// uses the existing BTree fast path; composite shapes fall
10207/// back to a row scan.
10208fn on_conflict_keys_exist(
10209    catalog: &Catalog,
10210    table_name: &str,
10211    column_positions: &[usize],
10212    key: &[&Value],
10213) -> bool {
10214    if column_positions.len() == 1 {
10215        return on_conflict_key_exists(catalog, table_name, column_positions[0], key[0]);
10216    }
10217    let Some(table) = catalog.get(table_name) else {
10218        return false;
10219    };
10220    table.rows().iter().any(|r| {
10221        column_positions
10222            .iter()
10223            .enumerate()
10224            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
10225    })
10226}
10227
10228/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
10229/// existing row.
10230///
10231/// `incoming` is the rejected INSERT row (used to resolve
10232/// `EXCLUDED.col` references in the assignment exprs);
10233/// `target_pos` is the position of the existing row in the table.
10234/// Each assignment substitutes `EXCLUDED.col` with the matching
10235/// incoming value, evaluates the resulting expression against
10236/// the existing row, and writes the new value into the
10237/// corresponding column of the returned `Vec<Value>`. If
10238/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
10239/// the conflicting row is silently kept unchanged.
10240fn apply_on_conflict_assignments(
10241    catalog: &Catalog,
10242    table_name: &str,
10243    target_pos: usize,
10244    incoming: &[Value],
10245    assignments: &[(String, Expr)],
10246    where_: Option<&Expr>,
10247) -> Result<Option<Vec<Value>>, EngineError> {
10248    let table = catalog.get(table_name).ok_or_else(|| {
10249        EngineError::Storage(StorageError::TableNotFound {
10250            name: table_name.into(),
10251        })
10252    })?;
10253    let schema_cols = table.schema().columns.clone();
10254    let existing = table
10255        .rows()
10256        .get(target_pos)
10257        .ok_or_else(|| {
10258            EngineError::Unsupported(alloc::format!(
10259                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
10260            ))
10261        })?
10262        .clone();
10263    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
10264    // Optional WHERE filter on the conflict row.
10265    if let Some(w) = where_ {
10266        let pred = w.clone();
10267        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
10268        let v = eval::eval_expr(&pred, &existing, &ctx)?;
10269        if !matches!(v, Value::Bool(true)) {
10270            return Ok(None);
10271        }
10272    }
10273    let mut new_values = existing.values.clone();
10274    for (col_name, expr) in assignments {
10275        let target_idx = schema_cols
10276            .iter()
10277            .position(|c| c.name == *col_name)
10278            .ok_or_else(|| {
10279                EngineError::Eval(EvalError::ColumnNotFound {
10280                    name: col_name.clone(),
10281                })
10282            })?;
10283        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
10284        let v = eval::eval_expr(&sub, &existing, &ctx)?;
10285        new_values[target_idx] = coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
10286    }
10287    Ok(Some(new_values))
10288}
10289
10290/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
10291/// "EXCLUDED", name }` reference with a `Literal` of the matching
10292/// value from the incoming-row vec. Resolution against the
10293/// child-table column list (by name).
10294fn substitute_excluded_refs(expr: Expr, schema_cols: &[ColumnSchema], incoming: &[Value]) -> Expr {
10295    use spg_sql::ast::ColumnName;
10296    match expr {
10297        Expr::Column(ColumnName { qualifier, name })
10298            if qualifier
10299                .as_deref()
10300                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
10301        {
10302            let pos = schema_cols.iter().position(|c| c.name == name);
10303            match pos {
10304                Some(p) => {
10305                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
10306                    value_to_literal_expr(v)
10307                        .unwrap_or_else(|_| Expr::Literal(spg_sql::ast::Literal::Null))
10308                }
10309                None => Expr::Column(ColumnName { qualifier, name }),
10310            }
10311        }
10312        Expr::Binary { op, lhs, rhs } => Expr::Binary {
10313            op,
10314            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
10315            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
10316        },
10317        Expr::Unary { op, expr } => Expr::Unary {
10318            op,
10319            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
10320        },
10321        Expr::FunctionCall { name, args } => Expr::FunctionCall {
10322            name,
10323            args: args
10324                .into_iter()
10325                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
10326                .collect(),
10327        },
10328        other => other,
10329    }
10330}
10331
10332/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
10333/// about to be inserted into `child_table`, every FK declared on
10334/// that table is checked: the row's FK columns must either be
10335/// NULL (SQL spec skip) or match an existing parent row via the
10336/// parent's BTree PK / UNIQUE index.
10337///
10338/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
10339/// payload on first failure.
10340///
10341/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
10342/// == child_table`, the parent rows visible to this check are
10343///  (a) rows already committed to the table, plus
10344///  (b) earlier rows from the *same* `rows` batch.
10345/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
10346/// work in a single statement — common pattern for bulk-loading
10347/// hierarchies.
10348/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
10349/// constraints at INSERT time. For each constraint declared on
10350/// the target table, check that no existing row + no earlier row
10351/// in the same batch has the same full-column tuple. NULL in
10352/// any column lifts the row out of the check (SQL spec: NULL
10353/// ≠ NULL for uniqueness). mailrs G1 + G6.
10354fn enforce_uniqueness_inserts(
10355    catalog: &Catalog,
10356    child_table: &str,
10357    constraints: &[spg_storage::UniquenessConstraint],
10358    rows: &[Vec<Value>],
10359) -> Result<(), EngineError> {
10360    if constraints.is_empty() {
10361        return Ok(());
10362    }
10363    let table = catalog.get(child_table).ok_or_else(|| {
10364        EngineError::Storage(StorageError::TableNotFound {
10365            name: child_table.into(),
10366        })
10367    })?;
10368    for uc in constraints {
10369        for (batch_idx, row_values) in rows.iter().enumerate() {
10370            let key: Vec<&Value> = uc.columns.iter().map(|&i| &row_values[i]).collect();
10371            let has_null = key.iter().any(|v| matches!(v, Value::Null));
10372            // v7.13.0 — `NULLS NOT DISTINCT` (mailrs round-5 G10,
10373            // PG 15+): two rows whose constrained columns are all
10374            // NULL collide. SQL-standard `NULLS DISTINCT` lets any
10375            // NULL skip the check.
10376            if has_null && !uc.nulls_not_distinct {
10377                continue;
10378            }
10379            // Table-side collision: scan existing rows.
10380            let collides_in_table = table.rows().iter().any(|prow| {
10381                uc.columns
10382                    .iter()
10383                    .enumerate()
10384                    .all(|(i, &p)| prow.values.get(p) == Some(key[i]))
10385            });
10386            // Batch-side collision: earlier rows in the same INSERT.
10387            let collides_in_batch = rows[..batch_idx].iter().any(|earlier| {
10388                uc.columns
10389                    .iter()
10390                    .enumerate()
10391                    .all(|(i, &p)| earlier.get(p) == Some(key[i]))
10392            });
10393            if collides_in_table || collides_in_batch {
10394                let kind = if uc.is_primary_key {
10395                    "PRIMARY KEY"
10396                } else {
10397                    "UNIQUE"
10398                };
10399                let col_names: Vec<String> = uc
10400                    .columns
10401                    .iter()
10402                    .map(|&i| table.schema().columns[i].name.clone())
10403                    .collect();
10404                return Err(EngineError::Unsupported(alloc::format!(
10405                    "{kind} violation on {child_table:?} columns {col_names:?}: \
10406                     row #{batch_idx} duplicates an existing key"
10407                )));
10408            }
10409        }
10410    }
10411    Ok(())
10412}
10413
10414/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
10415/// WHERE-style predicate. NULL → false (three-valued logic
10416/// collapses to "skip this row" for index inclusion). Numeric
10417/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
10418/// Everything else (strings, vectors, JSON, …) is not a valid
10419/// predicate result and surfaces as `false` so a malformed
10420/// predicate degrades to "row not in index" rather than panicking.
10421fn predicate_truthy(v: &spg_storage::Value) -> bool {
10422    use spg_storage::Value as V;
10423    match v {
10424        V::Bool(b) => *b,
10425        V::Int(n) => *n != 0,
10426        V::BigInt(n) => *n != 0,
10427        V::SmallInt(n) => *n != 0,
10428        _ => false,
10429    }
10430}
10431
10432/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
10433/// committed rows for pre-existing duplicates. If any pair of rows
10434/// matches the predicate AND has the same index key, refuse to
10435/// create the index so the user fixes the data before retrying.
10436fn check_existing_unique_violation(
10437    idx: &spg_storage::Index,
10438    schema: &spg_storage::TableSchema,
10439    rows: &[spg_storage::Row],
10440) -> Result<(), EngineError> {
10441    let predicate_expr = match idx.partial_predicate.as_deref() {
10442        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
10443            EngineError::Unsupported(alloc::format!(
10444                "stored partial predicate {s:?} failed to re-parse: {e:?}"
10445            ))
10446        })?),
10447        None => None,
10448    };
10449    let ctx = eval::EvalContext::new(&schema.columns, None);
10450    let key_positions = unique_key_positions(idx);
10451    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
10452    for row in rows {
10453        if let Some(expr) = &predicate_expr {
10454            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
10455                EngineError::Unsupported(alloc::format!(
10456                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
10457                ))
10458            })?;
10459            if !predicate_truthy(&v) {
10460                continue;
10461            }
10462        }
10463        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
10464            .iter()
10465            .map(|&p| {
10466                row.values
10467                    .get(p)
10468                    .cloned()
10469                    .unwrap_or(spg_storage::Value::Null)
10470            })
10471            .collect();
10472        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
10473            continue;
10474        }
10475        if seen.iter().any(|other| *other == key) {
10476            return Err(EngineError::Unsupported(alloc::format!(
10477                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
10478                idx.name
10479            )));
10480        }
10481        seen.push(key);
10482    }
10483    Ok(())
10484}
10485
10486/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
10487/// extra positions). For single-column indexes this is just
10488/// `[column_position]`.
10489fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
10490    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
10491    out.push(idx.column_position);
10492    out.extend_from_slice(&idx.extra_column_positions);
10493    out
10494}
10495
10496/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
10497/// target table. For each, eval the index's optional predicate
10498/// against (a) the candidate row and (b) every committed row plus
10499/// earlier batch rows; only rows where the predicate is truthy
10500/// participate. A duplicate key among predicate-matching rows is a
10501/// uniqueness violation. NULL keys lift the row out of the check
10502/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
10503fn enforce_unique_index_inserts(
10504    catalog: &Catalog,
10505    table_name: &str,
10506    rows: &[alloc::vec::Vec<spg_storage::Value>],
10507) -> Result<(), EngineError> {
10508    let table = catalog.get(table_name).ok_or_else(|| {
10509        EngineError::Storage(StorageError::TableNotFound {
10510            name: table_name.into(),
10511        })
10512    })?;
10513    let schema = table.schema();
10514    let ctx = eval::EvalContext::new(&schema.columns, None);
10515    for idx in table.indices() {
10516        if !idx.is_unique {
10517            continue;
10518        }
10519        // Re-parse the predicate once per index per batch.
10520        let predicate_expr = match idx.partial_predicate.as_deref() {
10521            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
10522                EngineError::Unsupported(alloc::format!(
10523                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
10524                    idx.name
10525                ))
10526            })?),
10527            None => None,
10528        };
10529        let key_positions = unique_key_positions(idx);
10530        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
10531            key_positions
10532                .iter()
10533                .map(|&p| values.get(p).cloned().unwrap_or(spg_storage::Value::Null))
10534                .collect()
10535        };
10536        // Helper: does `values` participate in this index? (predicate
10537        // truthy when present.) Wraps `values` into a transient Row
10538        // because eval_expr requires &Row.
10539        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
10540            let Some(expr) = &predicate_expr else {
10541                return Ok(true);
10542            };
10543            let tmp_row = spg_storage::Row {
10544                values: values.to_vec(),
10545            };
10546            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
10547                EngineError::Unsupported(alloc::format!(
10548                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
10549                    idx.name
10550                ))
10551            })?;
10552            Ok(predicate_truthy(&v))
10553        };
10554        for (batch_idx, row_values) in rows.iter().enumerate() {
10555            if !participates(row_values)? {
10556                continue;
10557            }
10558            let key = key_of(row_values);
10559            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
10560                continue;
10561            }
10562            // Committed-table collision.
10563            for prow in table.rows() {
10564                if !participates(&prow.values)? {
10565                    continue;
10566                }
10567                if key_of(&prow.values) == key {
10568                    return Err(EngineError::Unsupported(alloc::format!(
10569                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
10570                         row #{batch_idx} duplicates an existing key",
10571                        idx.name
10572                    )));
10573                }
10574            }
10575            // Within-batch collision: earlier rows in the same INSERT.
10576            for earlier in &rows[..batch_idx] {
10577                if !participates(earlier)? {
10578                    continue;
10579                }
10580                if key_of(earlier) == key {
10581                    return Err(EngineError::Unsupported(alloc::format!(
10582                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
10583                         row #{batch_idx} duplicates an earlier row in the same batch",
10584                        idx.name
10585                    )));
10586                }
10587            }
10588        }
10589    }
10590    Ok(())
10591}
10592
10593/// v7.13.0 — `UPDATE OF cols` filter helper (mailrs round-5 G7).
10594/// Returns `true` when at least one of `filter_cols` has a
10595/// different value in `new_row` vs `old_row`. Column lookup is
10596/// case-insensitive against `schema_cols`; unknown filter columns
10597/// are treated as "not changed" (the trigger therefore won't
10598/// fire on them — surfacing a parse-time error would be too
10599/// strict for catalog reloads where the schema may have drifted).
10600fn any_column_changed(
10601    filter_cols: &[String],
10602    schema_cols: &[ColumnSchema],
10603    old_row: &Row,
10604    new_row: &Row,
10605) -> bool {
10606    for col_name in filter_cols {
10607        let Some(pos) = schema_cols
10608            .iter()
10609            .position(|c| c.name.eq_ignore_ascii_case(col_name))
10610        else {
10611            continue;
10612        };
10613        let old_v = old_row.values.get(pos);
10614        let new_v = new_row.values.get(pos);
10615        if old_v != new_v {
10616            return true;
10617        }
10618    }
10619    false
10620}
10621
10622/// v7.13.0 — evaluate every CHECK predicate on the schema against
10623/// each candidate row. Mirrors PG semantics: a `false` result
10624/// rejects the mutation; a NULL result *passes* (CHECK rejects
10625/// only on definite-false, not on unknown). mailrs round-5 G3.
10626fn enforce_check_constraints(
10627    catalog: &Catalog,
10628    table_name: &str,
10629    rows: &[alloc::vec::Vec<spg_storage::Value>],
10630) -> Result<(), EngineError> {
10631    let table = catalog.get(table_name).ok_or_else(|| {
10632        EngineError::Storage(StorageError::TableNotFound {
10633            name: table_name.into(),
10634        })
10635    })?;
10636    let schema = table.schema();
10637    if schema.checks.is_empty() {
10638        return Ok(());
10639    }
10640    let ctx = eval::EvalContext::new(&schema.columns, None);
10641    let mut parsed: alloc::vec::Vec<(usize, Expr)> = alloc::vec::Vec::new();
10642    for (i, src) in schema.checks.iter().enumerate() {
10643        let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
10644            EngineError::Unsupported(alloc::format!(
10645                "CHECK constraint #{i} on {table_name:?} ({src:?}) failed to re-parse: {e:?}"
10646            ))
10647        })?;
10648        parsed.push((i, expr));
10649    }
10650    for (batch_idx, row_values) in rows.iter().enumerate() {
10651        let tmp_row = spg_storage::Row {
10652            values: row_values.clone(),
10653        };
10654        for (i, expr) in &parsed {
10655            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
10656                EngineError::Unsupported(alloc::format!(
10657                    "CHECK constraint #{i} on {table_name:?} eval at row #{batch_idx}: {e:?}"
10658                ))
10659            })?;
10660            // PG: NULL passes (CHECK rejects on definite-false only).
10661            if matches!(v, spg_storage::Value::Bool(false)) {
10662                return Err(EngineError::Unsupported(alloc::format!(
10663                    "CHECK constraint violation on {table_name:?} (row #{batch_idx}): {:?}",
10664                    schema.checks[*i]
10665                )));
10666            }
10667        }
10668    }
10669    Ok(())
10670}
10671
10672fn enforce_fk_inserts(
10673    catalog: &Catalog,
10674    child_table: &str,
10675    fks: &[spg_storage::ForeignKeyConstraint],
10676    rows: &[Vec<Value>],
10677) -> Result<(), EngineError> {
10678    for fk in fks {
10679        let parent_is_self = fk.parent_table == child_table;
10680        let parent = if parent_is_self {
10681            // Self-ref: read the current state of the same table.
10682            // The mut borrow on child has been dropped by the caller.
10683            catalog.get(child_table).ok_or_else(|| {
10684                EngineError::Storage(StorageError::TableNotFound {
10685                    name: child_table.into(),
10686                })
10687            })?
10688        } else {
10689            catalog.get(&fk.parent_table).ok_or_else(|| {
10690                EngineError::Storage(StorageError::TableNotFound {
10691                    name: fk.parent_table.clone(),
10692                })
10693            })?
10694        };
10695        for (batch_idx, row_values) in rows.iter().enumerate() {
10696            // Single-column FK fast path: try the parent's BTree
10697            // index for an O(log n) lookup. Composite FKs fall back
10698            // to a parent-row scan.
10699            if fk.local_columns.len() == 1 {
10700                let v = &row_values[fk.local_columns[0]];
10701                if matches!(v, Value::Null) {
10702                    continue;
10703                }
10704                let parent_col = fk.parent_columns[0];
10705                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
10706                    EngineError::Unsupported(alloc::format!(
10707                        "FOREIGN KEY column value of type {:?} is not index-eligible",
10708                        v.data_type()
10709                    ))
10710                })?;
10711                let present_committed = parent.indices().iter().any(|idx| {
10712                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10713                        && idx.column_position == parent_col
10714                        && idx.partial_predicate.is_none()
10715                        && !idx.lookup_eq(&key).is_empty()
10716                });
10717                // v7.6.7 self-ref widening: also accept a match
10718                // against earlier rows in this same batch when the
10719                // FK points at the table being inserted into.
10720                let present_in_batch = parent_is_self
10721                    && rows[..batch_idx]
10722                        .iter()
10723                        .any(|earlier| earlier.get(parent_col) == Some(v));
10724                if !(present_committed || present_in_batch) {
10725                    return Err(EngineError::Unsupported(alloc::format!(
10726                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
10727                        fk.parent_table,
10728                        parent
10729                            .schema()
10730                            .columns
10731                            .get(parent_col)
10732                            .map_or("?", |c| c.name.as_str()),
10733                        v,
10734                    )));
10735                }
10736            } else {
10737                // Composite FK: scan parent rows. v7.6.7 also
10738                // accepts a match against earlier rows in the same
10739                // batch (self-ref bulk-loading of hierarchies).
10740                if fk
10741                    .local_columns
10742                    .iter()
10743                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
10744                {
10745                    continue;
10746                }
10747                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
10748                let parent_match_committed = parent.rows().iter().any(|prow| {
10749                    fk.parent_columns
10750                        .iter()
10751                        .enumerate()
10752                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
10753                });
10754                let parent_match_in_batch = parent_is_self
10755                    && rows[..batch_idx].iter().any(|earlier| {
10756                        fk.parent_columns
10757                            .iter()
10758                            .enumerate()
10759                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
10760                    });
10761                if !(parent_match_committed || parent_match_in_batch) {
10762                    return Err(EngineError::Unsupported(alloc::format!(
10763                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
10764                        fk.parent_table,
10765                    )));
10766                }
10767            }
10768        }
10769    }
10770    Ok(())
10771}
10772
10773/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
10774/// DELETE on a parent. The plan is a list of these steps, stacked
10775/// across the FK graph by `plan_fk_parent_deletions`.
10776#[derive(Debug, Clone)]
10777struct FkChildStep {
10778    child_table: String,
10779    action: FkChildAction,
10780}
10781
10782#[derive(Debug, Clone)]
10783enum FkChildAction {
10784    /// CASCADE — remove these rows. Sorted, deduplicated positions.
10785    Delete { positions: Vec<usize> },
10786    /// SET NULL — for each (row, column) in the flat list, write
10787    /// NULL into that child cell. Multiple FKs on the same row may
10788    /// produce overlapping entries (deduped at plan time).
10789    SetNull {
10790        positions: Vec<usize>,
10791        columns: Vec<usize>,
10792    },
10793    /// SET DEFAULT — same shape as SetNull but writes the column's
10794    /// declared DEFAULT value (resolved at plan time). Columns
10795    /// without a DEFAULT raise an error during planning.
10796    SetDefault {
10797        positions: Vec<usize>,
10798        columns: Vec<usize>,
10799        defaults: Vec<Value>,
10800    },
10801}
10802
10803/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
10804///
10805/// Walks every table in the catalog looking for FKs whose
10806/// `parent_table` is `parent_table_name`. For each such FK + each
10807/// to-be-deleted parent row:
10808///
10809///   - RESTRICT / NoAction → error, no plan returned
10810///   - CASCADE → child rows get scheduled for deletion; recursive
10811///   - SetNull → child FK column(s) scheduled to be NULL-ed.
10812///     Verified NULL-able at plan time.
10813///   - SetDefault → child FK column(s) scheduled to be reset to
10814///     their declared DEFAULT. Columns without a DEFAULT raise.
10815///
10816/// SET NULL / SET DEFAULT do NOT cascade further — the child row
10817/// stays; only one of its columns mutates.
10818fn plan_fk_parent_deletions(
10819    catalog: &Catalog,
10820    parent_table_name: &str,
10821    to_delete_positions: &[usize],
10822    to_delete_rows: &[Vec<Value>],
10823) -> Result<Vec<FkChildStep>, EngineError> {
10824    use alloc::collections::{BTreeMap, BTreeSet};
10825    if to_delete_rows.is_empty() {
10826        return Ok(Vec::new());
10827    }
10828    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
10829    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
10830    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
10831    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10832    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
10833    for &p in to_delete_positions {
10834        visited.insert((parent_table_name.to_string(), p));
10835    }
10836    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
10837        .iter()
10838        .map(|r| (parent_table_name.to_string(), r.clone()))
10839        .collect();
10840    while let Some((cur_parent, parent_row)) = work.pop() {
10841        for child_name in catalog.table_names() {
10842            let child = catalog
10843                .get(&child_name)
10844                .expect("table_names → catalog.get round-trip is total");
10845            for fk in &child.schema().foreign_keys {
10846                if fk.parent_table != cur_parent {
10847                    continue;
10848                }
10849                let parent_key: Vec<&Value> = fk
10850                    .parent_columns
10851                    .iter()
10852                    .map(|&pi| &parent_row[pi])
10853                    .collect();
10854                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
10855                    continue;
10856                }
10857                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
10858                    if child_name == cur_parent
10859                        && visited.contains(&(child_name.clone(), child_row_idx))
10860                    {
10861                        continue;
10862                    }
10863                    let matches_key = fk
10864                        .local_columns
10865                        .iter()
10866                        .enumerate()
10867                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
10868                    if !matches_key {
10869                        continue;
10870                    }
10871                    match fk.on_delete {
10872                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
10873                            return Err(EngineError::Unsupported(alloc::format!(
10874                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
10875                                 restricted by FK from {child_name:?}.{:?}",
10876                                fk.local_columns,
10877                            )));
10878                        }
10879                        spg_storage::FkAction::Cascade => {
10880                            if visited.insert((child_name.clone(), child_row_idx)) {
10881                                delete_plan
10882                                    .entry(child_name.clone())
10883                                    .or_default()
10884                                    .insert(child_row_idx);
10885                                work.push((child_name.clone(), child_row.values.clone()));
10886                            }
10887                        }
10888                        spg_storage::FkAction::SetNull => {
10889                            // Verify every local FK column is NULL-able.
10890                            for &li in &fk.local_columns {
10891                                let col = child.schema().columns.get(li).ok_or_else(|| {
10892                                    EngineError::Unsupported(alloc::format!(
10893                                        "FK local column {li} missing in {child_name:?}"
10894                                    ))
10895                                })?;
10896                                if !col.nullable {
10897                                    return Err(EngineError::Unsupported(alloc::format!(
10898                                        "FOREIGN KEY ON DELETE SET NULL: column \
10899                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
10900                                        col.name,
10901                                    )));
10902                                }
10903                            }
10904                            let entry = setnull_plan.entry(child_name.clone()).or_default();
10905                            for &li in &fk.local_columns {
10906                                entry.insert((child_row_idx, li));
10907                            }
10908                        }
10909                        spg_storage::FkAction::SetDefault => {
10910                            // Resolve the DEFAULT for every local FK col.
10911                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
10912                            for &li in &fk.local_columns {
10913                                let col = child.schema().columns.get(li).ok_or_else(|| {
10914                                    EngineError::Unsupported(alloc::format!(
10915                                        "FK local column {li} missing in {child_name:?}"
10916                                    ))
10917                                })?;
10918                                let default = col.default.clone().ok_or_else(|| {
10919                                    EngineError::Unsupported(alloc::format!(
10920                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
10921                                         {child_name:?}.{:?} has no DEFAULT declared",
10922                                        col.name,
10923                                    ))
10924                                })?;
10925                                entry.insert((child_row_idx, li), default);
10926                            }
10927                        }
10928                    }
10929                }
10930            }
10931        }
10932    }
10933    // Flatten the three plans into the ordered `FkChildStep` list.
10934    // Deletes are applied last per child (after any null/default
10935    // re-writes on the same child) so a child row that's both
10936    // re-written and then cascade-deleted only ends up deleted —
10937    // but in v7.6.5 SetNull/Cascade never overlap on the same row
10938    // (a single FK chooses exactly one action), so the order is
10939    // mostly a precaution.
10940    let mut steps: Vec<FkChildStep> = Vec::new();
10941    for (child_table, entries) in setnull_plan {
10942        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
10943        steps.push(FkChildStep {
10944            child_table,
10945            action: FkChildAction::SetNull { positions, columns },
10946        });
10947    }
10948    for (child_table, entries) in setdefault_plan {
10949        let mut positions = Vec::with_capacity(entries.len());
10950        let mut columns = Vec::with_capacity(entries.len());
10951        let mut defaults = Vec::with_capacity(entries.len());
10952        for ((p, c), v) in entries {
10953            positions.push(p);
10954            columns.push(c);
10955            defaults.push(v);
10956        }
10957        steps.push(FkChildStep {
10958            child_table,
10959            action: FkChildAction::SetDefault {
10960                positions,
10961                columns,
10962                defaults,
10963            },
10964        });
10965    }
10966    for (child_table, positions) in delete_plan {
10967        steps.push(FkChildStep {
10968            child_table,
10969            action: FkChildAction::Delete {
10970                positions: positions.into_iter().collect(),
10971            },
10972        });
10973    }
10974    Ok(steps)
10975}
10976
10977/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
10978/// PK/UNIQUE columns. Walks every other table whose FK references
10979/// `parent_table_name`; for each FK whose parent_columns overlap a
10980/// mutated column, decides the action by `fk.on_update`.
10981///
10982///   - RESTRICT / NoAction → error if any child references the OLD
10983///     value
10984///   - CASCADE → child FK columns get rewritten to the NEW parent
10985///     value (a SetNull-style update step with the new value)
10986///   - SetNull → child FK columns set to NULL
10987///   - SetDefault → child FK columns set to declared default
10988///
10989/// `plan_with_old` is `(row_position, old_values, new_values)` so
10990/// the planner can detect "did this row's parent key actually
10991/// change?" — only rows where at least one referenced parent
10992/// column moved trigger inbound work.
10993fn plan_fk_parent_updates(
10994    catalog: &Catalog,
10995    parent_table_name: &str,
10996    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
10997) -> Result<Vec<FkChildStep>, EngineError> {
10998    use alloc::collections::BTreeMap;
10999    if plan_with_old.is_empty() {
11000        return Ok(Vec::new());
11001    }
11002    // For each child table we may touch, build per-child step
11003    // lists. UPDATE never deletes children — `delete_plan` stays
11004    // empty here but is kept structurally aligned with
11005    // `plan_fk_parent_deletions` for future use.
11006    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
11007    let mut setnull_plan: BTreeMap<String, alloc::collections::BTreeSet<(usize, usize)>> =
11008        BTreeMap::new();
11009    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
11010    // Cascade-update plan: child_table → row_idx → col_idx → new_value
11011    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
11012
11013    for child_name in catalog.table_names() {
11014        let child = catalog
11015            .get(&child_name)
11016            .expect("table_names → catalog.get total");
11017        for fk in &child.schema().foreign_keys {
11018            if fk.parent_table != parent_table_name {
11019                continue;
11020            }
11021            for (_pos, old_row, new_row) in plan_with_old {
11022                // Did any parent FK column change?
11023                let key_changed = fk
11024                    .parent_columns
11025                    .iter()
11026                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
11027                if !key_changed {
11028                    continue;
11029                }
11030                // The OLD parent key — used to find referring children.
11031                let old_key: Vec<&Value> =
11032                    fk.parent_columns.iter().map(|&pi| &old_row[pi]).collect();
11033                if old_key.iter().any(|v| matches!(v, Value::Null)) {
11034                    // NULL parent has no children — skip.
11035                    continue;
11036                }
11037                let new_key: Vec<&Value> =
11038                    fk.parent_columns.iter().map(|&pi| &new_row[pi]).collect();
11039                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
11040                    // Self-ref same-row updates: a row updating its
11041                    // own PK doesn't restrict itself.
11042                    if child_name == parent_table_name
11043                        && plan_with_old.iter().any(|(p, _, _)| *p == child_row_idx)
11044                    {
11045                        continue;
11046                    }
11047                    let matches_key = fk
11048                        .local_columns
11049                        .iter()
11050                        .enumerate()
11051                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
11052                    if !matches_key {
11053                        continue;
11054                    }
11055                    match fk.on_update {
11056                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
11057                            return Err(EngineError::Unsupported(alloc::format!(
11058                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
11059                                 restricted by FK from {child_name:?}.{:?}",
11060                                fk.local_columns,
11061                            )));
11062                        }
11063                        spg_storage::FkAction::Cascade => {
11064                            // Rewrite child FK columns to new key.
11065                            let entry = cascade_plan.entry(child_name.clone()).or_default();
11066                            for (i, &li) in fk.local_columns.iter().enumerate() {
11067                                entry.insert((child_row_idx, li), new_key[i].clone());
11068                            }
11069                        }
11070                        spg_storage::FkAction::SetNull => {
11071                            for &li in &fk.local_columns {
11072                                let col = child.schema().columns.get(li).ok_or_else(|| {
11073                                    EngineError::Unsupported(alloc::format!(
11074                                        "FK local column {li} missing in {child_name:?}"
11075                                    ))
11076                                })?;
11077                                if !col.nullable {
11078                                    return Err(EngineError::Unsupported(alloc::format!(
11079                                        "FOREIGN KEY ON UPDATE SET NULL: column \
11080                                         {child_name:?}.{:?} is NOT NULL",
11081                                        col.name,
11082                                    )));
11083                                }
11084                            }
11085                            let entry = setnull_plan.entry(child_name.clone()).or_default();
11086                            for &li in &fk.local_columns {
11087                                entry.insert((child_row_idx, li));
11088                            }
11089                        }
11090                        spg_storage::FkAction::SetDefault => {
11091                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
11092                            for &li in &fk.local_columns {
11093                                let col = child.schema().columns.get(li).ok_or_else(|| {
11094                                    EngineError::Unsupported(alloc::format!(
11095                                        "FK local column {li} missing in {child_name:?}"
11096                                    ))
11097                                })?;
11098                                let default = col.default.clone().ok_or_else(|| {
11099                                    EngineError::Unsupported(alloc::format!(
11100                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
11101                                         {child_name:?}.{:?} has no DEFAULT",
11102                                        col.name,
11103                                    ))
11104                                })?;
11105                                entry.insert((child_row_idx, li), default);
11106                            }
11107                        }
11108                    }
11109                }
11110            }
11111        }
11112    }
11113    // Flatten into FkChildStep list. UPDATE doesn't produce
11114    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
11115    let mut steps: Vec<FkChildStep> = Vec::new();
11116    for (child_table, entries) in cascade_plan {
11117        let mut positions = Vec::with_capacity(entries.len());
11118        let mut columns = Vec::with_capacity(entries.len());
11119        let mut defaults = Vec::with_capacity(entries.len());
11120        for ((p, c), v) in entries {
11121            positions.push(p);
11122            columns.push(c);
11123            defaults.push(v);
11124        }
11125        // We reuse `FkChildAction::SetDefault` for cascade-update:
11126        // both shapes are "write a known value into specific cells"
11127        // — `apply_per_cell_writes` doesn't care whether the value
11128        // came from a DEFAULT declaration or a new parent key.
11129        steps.push(FkChildStep {
11130            child_table,
11131            action: FkChildAction::SetDefault {
11132                positions,
11133                columns,
11134                defaults,
11135            },
11136        });
11137    }
11138    for (child_table, entries) in setnull_plan {
11139        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
11140        steps.push(FkChildStep {
11141            child_table,
11142            action: FkChildAction::SetNull { positions, columns },
11143        });
11144    }
11145    for (child_table, entries) in setdefault_plan {
11146        let mut positions = Vec::with_capacity(entries.len());
11147        let mut columns = Vec::with_capacity(entries.len());
11148        let mut defaults = Vec::with_capacity(entries.len());
11149        for ((p, c), v) in entries {
11150            positions.push(p);
11151            columns.push(c);
11152            defaults.push(v);
11153        }
11154        steps.push(FkChildStep {
11155            child_table,
11156            action: FkChildAction::SetDefault {
11157                positions,
11158                columns,
11159                defaults,
11160            },
11161        });
11162    }
11163    let _ = delete_plan; // UPDATE never deletes children.
11164    Ok(steps)
11165}
11166
11167/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
11168/// the three action variants so the DELETE executor stays a
11169/// simple loop over the planned steps.
11170fn apply_fk_child_step(catalog: &mut Catalog, step: &FkChildStep) -> Result<(), EngineError> {
11171    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
11172        EngineError::Storage(StorageError::TableNotFound {
11173            name: step.child_table.clone(),
11174        })
11175    })?;
11176    match &step.action {
11177        FkChildAction::Delete { positions } => {
11178            let _ = child.delete_rows(positions);
11179        }
11180        FkChildAction::SetNull { positions, columns } => {
11181            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
11182        }
11183        FkChildAction::SetDefault {
11184            positions,
11185            columns,
11186            defaults,
11187        } => {
11188            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
11189        }
11190    }
11191    Ok(())
11192}
11193
11194/// v7.6.5 — write new values into selected child cells via
11195/// `Table::update_row` (the catalog's existing UPDATE entry).
11196/// Groups writes by row position so multi-column updates on the
11197/// same row only call `update_row` once. `value_for(i)` produces
11198/// the new value for the i-th (position, column) entry.
11199fn apply_per_cell_writes(
11200    child: &mut spg_storage::Table,
11201    positions: &[usize],
11202    columns: &[usize],
11203    mut value_for: impl FnMut(usize) -> Value,
11204) -> Result<(), EngineError> {
11205    use alloc::collections::BTreeMap;
11206    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
11207    for i in 0..positions.len() {
11208        by_row
11209            .entry(positions[i])
11210            .or_default()
11211            .push((columns[i], value_for(i)));
11212    }
11213    for (pos, mutations) in by_row {
11214        let mut new_values = child.rows()[pos].values.clone();
11215        for (col, v) in mutations {
11216            if let Some(slot) = new_values.get_mut(col) {
11217                *slot = v;
11218            }
11219        }
11220        child
11221            .update_row(pos, new_values)
11222            .map_err(EngineError::Storage)?;
11223    }
11224    Ok(())
11225}
11226
11227fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
11228    match a {
11229        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
11230        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
11231        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
11232        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
11233        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
11234    }
11235}
11236
11237/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
11238/// default-fill. Free fn (rather than `&self`) so callers
11239/// with an active `&mut Table` borrow can still use it.
11240/// Literal defaults take the cached path (`col.default`);
11241/// runtime defaults hit `clock_fn` at each call. mailrs G4.
11242fn resolve_column_default_free(
11243    col: &ColumnSchema,
11244    clock_fn: Option<ClockFn>,
11245) -> Result<Value, EngineError> {
11246    if let Some(rt) = &col.runtime_default {
11247        return eval_runtime_default_free(rt, col.ty, clock_fn);
11248    }
11249    Ok(col.default.clone().unwrap_or(Value::Null))
11250}
11251
11252fn eval_runtime_default_free(
11253    rt: &str,
11254    ty: DataType,
11255    clock_fn: Option<ClockFn>,
11256) -> Result<Value, EngineError> {
11257    let s = rt.trim().to_ascii_lowercase();
11258    let canonical = s.trim_end_matches("()");
11259    let now_us = match clock_fn {
11260        Some(f) => f(),
11261        None => 0,
11262    };
11263    let v = match canonical {
11264        "now" | "current_timestamp" | "localtimestamp" => Value::Timestamp(now_us),
11265        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
11266        "current_time" | "localtime" => Value::Timestamp(now_us),
11267        other => {
11268            return Err(EngineError::Unsupported(alloc::format!(
11269                "runtime DEFAULT expression {other:?} not supported \
11270                 (v7.9.21 whitelist: now() / current_timestamp / \
11271                 current_date / current_time / localtimestamp / \
11272                 localtime)"
11273            )));
11274        }
11275    };
11276    coerce_value(v, ty, "DEFAULT", 0)
11277}
11278
11279/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
11280/// evaluation rather than being cacheable as a literal Value.
11281/// FunctionCall is the immediate case (`now()`,
11282/// `current_timestamp`). Literal expressions and simple sign-
11283/// flipped numerics still take the static-cache path.
11284fn is_runtime_default_expr(expr: &Expr) -> bool {
11285    match expr {
11286        Expr::FunctionCall { .. } => true,
11287        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
11288        _ => false,
11289    }
11290}
11291
11292fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
11293    let ty = column_type_to_data_type(c.ty);
11294    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
11295    if let Some(default_expr) = c.default {
11296        // v7.9.21 — distinguish literal defaults (evaluated once
11297        // at CREATE TABLE) from expression defaults (deferred to
11298        // INSERT). Function calls (`now()`, `current_timestamp`
11299        // — see v7.9.20 keyword promotion) take the runtime path.
11300        // Literals continue to cache. mailrs G4.
11301        if is_runtime_default_expr(&default_expr) {
11302            let display = alloc::format!("{default_expr}");
11303            schema = schema.with_runtime_default(display);
11304        } else {
11305            let raw = literal_expr_to_value(default_expr)?;
11306            let coerced = coerce_value(raw, ty, &c.name, 0)?;
11307            schema = schema.with_default(coerced);
11308        }
11309    }
11310    if c.auto_increment {
11311        // AUTO_INCREMENT only makes sense on integer-shaped columns.
11312        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
11313            return Err(EngineError::Unsupported(alloc::format!(
11314                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
11315            )));
11316        }
11317        schema = schema.with_auto_increment();
11318    }
11319    Ok(schema)
11320}
11321
11322/// v7.10.4 — decode a BYTEA literal. Accepts:
11323///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
11324///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
11325///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
11326fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
11327    let s = s.trim();
11328    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
11329        // Hex form. Each pair of hex digits → one byte.
11330        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
11331        if cleaned.len() % 2 != 0 {
11332            return Err("odd-length hex literal");
11333        }
11334        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
11335        let cleaned_bytes = cleaned.as_bytes();
11336        for i in (0..cleaned_bytes.len()).step_by(2) {
11337            let hi = hex_nibble(cleaned_bytes[i])?;
11338            let lo = hex_nibble(cleaned_bytes[i + 1])?;
11339            out.push((hi << 4) | lo);
11340        }
11341        return Ok(out);
11342    }
11343    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
11344    // sequences decode; anything else is a literal byte.
11345    let bytes = s.as_bytes();
11346    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
11347    let mut i = 0;
11348    while i < bytes.len() {
11349        let b = bytes[i];
11350        if b == b'\\' && i + 1 < bytes.len() {
11351            let n = bytes[i + 1];
11352            if n == b'\\' {
11353                out.push(b'\\');
11354                i += 2;
11355                continue;
11356            }
11357            if n.is_ascii_digit()
11358                && i + 3 < bytes.len()
11359                && bytes[i + 2].is_ascii_digit()
11360                && bytes[i + 3].is_ascii_digit()
11361            {
11362                let oct = |x: u8| (x - b'0') as u32;
11363                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
11364                if v <= 0xFF {
11365                    out.push(v as u8);
11366                    i += 4;
11367                    continue;
11368                }
11369            }
11370        }
11371        out.push(b);
11372        i += 1;
11373    }
11374    Ok(out)
11375}
11376
11377fn hex_nibble(b: u8) -> Result<u8, &'static str> {
11378    match b {
11379        b'0'..=b'9' => Ok(b - b'0'),
11380        b'a'..=b'f' => Ok(b - b'a' + 10),
11381        b'A'..=b'F' => Ok(b - b'A' + 10),
11382        _ => Err("invalid hex digit"),
11383    }
11384}
11385
11386/// v7.10.11 — decode a PG TEXT[] external array form
11387/// (`{a,b,NULL}` with optional double-quoted elements). The
11388/// engine takes a leading/trailing `{`/`}` and splits at commas.
11389/// Quoted elements (`"hello, world"`) preserve embedded commas;
11390/// `\\` and `\"` decode to literal backslash / quote. Plain
11391/// unquoted `NULL` (case-insensitive) maps to `None`.
11392/// v7.11.13 — pick the array type for `ARRAY[lit, …]` from the
11393/// element values. Single-element-type rules:
11394///   - all NULL / all Text → TextArray
11395///   - all Int (or Int+NULL) → IntArray
11396///   - any BigInt without Text → BigIntArray (widening)
11397///   - any Text → TextArray (fallback; non-string elements
11398///     render as text)
11399fn array_literal_widen(items: alloc::vec::Vec<Value>) -> Value {
11400    let mut has_text = false;
11401    let mut has_bigint = false;
11402    let mut has_int = false;
11403    for v in &items {
11404        match v {
11405            Value::Null => {}
11406            Value::Text(_) | Value::Json(_) => has_text = true,
11407            Value::BigInt(_) => has_bigint = true,
11408            Value::Int(_) | Value::SmallInt(_) => has_int = true,
11409            _ => has_text = true,
11410        }
11411    }
11412    if has_text || (!has_bigint && !has_int) {
11413        let out: alloc::vec::Vec<Option<alloc::string::String>> = items
11414            .into_iter()
11415            .map(|v| match v {
11416                Value::Null => None,
11417                Value::Text(s) | Value::Json(s) => Some(s),
11418                other => Some(alloc::format!("{other:?}")),
11419            })
11420            .collect();
11421        return Value::TextArray(out);
11422    }
11423    if has_bigint {
11424        let out: alloc::vec::Vec<Option<i64>> = items
11425            .into_iter()
11426            .map(|v| match v {
11427                Value::Null => None,
11428                Value::Int(n) => Some(i64::from(n)),
11429                Value::SmallInt(n) => Some(i64::from(n)),
11430                Value::BigInt(n) => Some(n),
11431                _ => unreachable!("widen: unexpected non-integer in BigInt path"),
11432            })
11433            .collect();
11434        return Value::BigIntArray(out);
11435    }
11436    let out: alloc::vec::Vec<Option<i32>> = items
11437        .into_iter()
11438        .map(|v| match v {
11439            Value::Null => None,
11440            Value::Int(n) => Some(n),
11441            Value::SmallInt(n) => Some(i32::from(n)),
11442            _ => unreachable!("widen: unexpected non-i32-compatible in Int path"),
11443        })
11444        .collect();
11445    Value::IntArray(out)
11446}
11447
11448fn decode_text_array_literal(
11449    s: &str,
11450) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
11451    let trimmed = s.trim();
11452    let inner = trimmed
11453        .strip_prefix('{')
11454        .and_then(|x| x.strip_suffix('}'))
11455        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
11456    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
11457    if inner.trim().is_empty() {
11458        return Ok(out);
11459    }
11460    let bytes = inner.as_bytes();
11461    let mut i = 0;
11462    while i <= bytes.len() {
11463        // Skip leading whitespace.
11464        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
11465            i += 1;
11466        }
11467        // Quoted element.
11468        if i < bytes.len() && bytes[i] == b'"' {
11469            i += 1; // open quote
11470            let mut buf = alloc::string::String::new();
11471            while i < bytes.len() && bytes[i] != b'"' {
11472                if bytes[i] == b'\\' && i + 1 < bytes.len() {
11473                    buf.push(bytes[i + 1] as char);
11474                    i += 2;
11475                } else {
11476                    buf.push(bytes[i] as char);
11477                    i += 1;
11478                }
11479            }
11480            if i >= bytes.len() {
11481                return Err("unterminated quoted element");
11482            }
11483            i += 1; // close quote
11484            out.push(Some(buf));
11485        } else {
11486            // Unquoted element — read until next comma or end.
11487            let start = i;
11488            while i < bytes.len() && bytes[i] != b',' {
11489                i += 1;
11490            }
11491            let raw = inner[start..i].trim();
11492            if raw.eq_ignore_ascii_case("NULL") {
11493                out.push(None);
11494            } else {
11495                out.push(Some(alloc::string::ToString::to_string(raw)));
11496            }
11497        }
11498        // Skip whitespace, expect comma or end.
11499        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
11500            i += 1;
11501        }
11502        if i >= bytes.len() {
11503            break;
11504        }
11505        if bytes[i] != b',' {
11506            return Err("expected ',' between TEXT[] elements");
11507        }
11508        i += 1;
11509    }
11510    Ok(out)
11511}
11512
11513/// v7.10.11 — encode a TEXT[] back into the PG external array
11514/// form. NULL elements become the literal `NULL`; elements
11515/// containing commas, quotes, backslashes, or braces are
11516/// double-quoted with `\\` / `\"` escapes.
11517fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
11518    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
11519    out.push('{');
11520    for (i, item) in items.iter().enumerate() {
11521        if i > 0 {
11522            out.push(',');
11523        }
11524        match item {
11525            None => out.push_str("NULL"),
11526            Some(s) => {
11527                let needs_quote = s.is_empty()
11528                    || s.eq_ignore_ascii_case("NULL")
11529                    || s.chars()
11530                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
11531                if needs_quote {
11532                    out.push('"');
11533                    for c in s.chars() {
11534                        if c == '"' || c == '\\' {
11535                            out.push('\\');
11536                        }
11537                        out.push(c);
11538                    }
11539                    out.push('"');
11540                } else {
11541                    out.push_str(s);
11542                }
11543            }
11544        }
11545    }
11546    out.push('}');
11547    out
11548}
11549
11550/// v7.10.4 — encode BYTEA bytes in PG hex output format
11551/// (`\x` prefix, lowercase hex pairs). Used by Text-side
11552/// round-trip + the wire layer's text-mode encoder.
11553fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
11554    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
11555    out.push_str("\\x");
11556    for byte in b {
11557        let hi = byte >> 4;
11558        let lo = byte & 0x0F;
11559        out.push(hex_digit(hi));
11560        out.push(hex_digit(lo));
11561    }
11562    out
11563}
11564
11565const fn hex_digit(n: u8) -> char {
11566    match n {
11567        0..=9 => (b'0' + n) as char,
11568        10..=15 => (b'a' + n - 10) as char,
11569        _ => '?',
11570    }
11571}
11572
11573const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
11574    match t {
11575        ColumnTypeName::SmallInt => DataType::SmallInt,
11576        ColumnTypeName::Int => DataType::Int,
11577        ColumnTypeName::BigInt => DataType::BigInt,
11578        ColumnTypeName::Float => DataType::Float,
11579        ColumnTypeName::Text => DataType::Text,
11580        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
11581        ColumnTypeName::Char(n) => DataType::Char(n),
11582        ColumnTypeName::Bool => DataType::Bool,
11583        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
11584            dim,
11585            encoding: match encoding {
11586                SqlVecEncoding::F32 => VecEncoding::F32,
11587                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
11588                SqlVecEncoding::F16 => VecEncoding::F16,
11589            },
11590        },
11591        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
11592        ColumnTypeName::Date => DataType::Date,
11593        ColumnTypeName::Timestamp => DataType::Timestamp,
11594        ColumnTypeName::Timestamptz => DataType::Timestamptz,
11595        ColumnTypeName::Json => DataType::Json,
11596        ColumnTypeName::Jsonb => DataType::Jsonb,
11597        ColumnTypeName::Bytes => DataType::Bytes,
11598        ColumnTypeName::TextArray => DataType::TextArray,
11599        ColumnTypeName::IntArray => DataType::IntArray,
11600        ColumnTypeName::BigIntArray => DataType::BigIntArray,
11601        ColumnTypeName::TsVector => DataType::TsVector,
11602        ColumnTypeName::TsQuery => DataType::TsQuery,
11603    }
11604}
11605
11606/// Convert an INSERT VALUES expression to a storage Value. Supports literal
11607/// expressions, unary-minus over numeric literals, and pgvector-style
11608/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
11609fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
11610    match expr {
11611        Expr::Literal(l) => Ok(literal_to_value(l)),
11612        Expr::Cast { expr, target } => {
11613            let inner_value = literal_expr_to_value(*expr)?;
11614            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
11615        }
11616        Expr::Unary {
11617            op: UnOp::Neg,
11618            expr,
11619        } => match *expr {
11620            Expr::Literal(Literal::Integer(n)) => {
11621                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
11622                // — overflow on negate of i64::MIN is the one edge case.
11623                let neg = n.checked_neg().ok_or_else(|| {
11624                    EngineError::Unsupported("integer literal overflow on negation".into())
11625                })?;
11626                Ok(int_value_for(neg))
11627            }
11628            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
11629            other => Err(EngineError::Unsupported(alloc::format!(
11630                "unary minus over non-literal expression: {other:?}"
11631            ))),
11632        },
11633        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
11634        // INSERT-time. Each element must reduce to a Value through
11635        // `literal_expr_to_value`; NULL elements become `None`.
11636        // v7.11.13 — deduce shape from element values: all Int →
11637        // IntArray; any BigInt → BigIntArray (widening); any Text
11638        // → TextArray. Cast targets (`ARRAY[]::INT[]`) flow through
11639        // the outer Cast arm before reaching here and re-coerce.
11640        Expr::Array(items) => {
11641            let mut materialised: alloc::vec::Vec<Value> =
11642                alloc::vec::Vec::with_capacity(items.len());
11643            for elem in items {
11644                materialised.push(literal_expr_to_value(elem)?);
11645            }
11646            Ok(array_literal_widen(materialised))
11647        }
11648        other => Err(EngineError::Unsupported(alloc::format!(
11649            "non-literal INSERT value expression: {other:?}"
11650        ))),
11651    }
11652}
11653
11654fn literal_to_value(l: Literal) -> Value {
11655    match l {
11656        Literal::Integer(n) => int_value_for(n),
11657        Literal::Float(x) => Value::Float(x),
11658        Literal::String(s) => Value::Text(s),
11659        Literal::Bool(b) => Value::Bool(b),
11660        Literal::Null => Value::Null,
11661        Literal::Vector(v) => Value::Vector(v),
11662        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
11663    }
11664}
11665
11666/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
11667/// columns will still enforce the right tag downstream — this is just the
11668/// default we synthesise from an unannotated integer literal.
11669fn int_value_for(n: i64) -> Value {
11670    if let Ok(small) = i32::try_from(n) {
11671        Value::Int(small)
11672    } else {
11673        Value::BigInt(n)
11674    }
11675}
11676
11677/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
11678/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
11679/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
11680/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
11681/// `NULL` is always permitted; the nullability check happens later in storage.
11682#[allow(clippy::too_many_lines)]
11683fn coerce_value(
11684    v: Value,
11685    expected: DataType,
11686    col_name: &str,
11687    position: usize,
11688) -> Result<Value, EngineError> {
11689    if v.is_null() {
11690        return Ok(Value::Null);
11691    }
11692    let actual = v.data_type().expect("non-null");
11693    if actual == expected {
11694        return Ok(v);
11695    }
11696    let coerced = match (v, expected) {
11697        (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11698        (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11699        (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11700        (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11701            i128::from(n),
11702            precision,
11703            scale,
11704            col_name,
11705        )?),
11706        (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
11707        (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11708        (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11709        (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11710            i128::from(n),
11711            precision,
11712            scale,
11713            col_name,
11714        )?),
11715        (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
11716        (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11717        #[allow(clippy::cast_precision_loss)]
11718        (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
11719        (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11720            i128::from(n),
11721            precision,
11722            scale,
11723            col_name,
11724        )?),
11725        (Value::Float(x), DataType::Numeric { precision, scale }) => {
11726            Some(numeric_from_float(x, precision, scale, col_name)?)
11727        }
11728        // Text → DATE / TIMESTAMP: parse canonical text forms.
11729        (Value::Text(s), DataType::Date) => {
11730            let d = eval::parse_date_literal(&s).ok_or_else(|| {
11731                EngineError::Eval(EvalError::TypeMismatch {
11732                    detail: alloc::format!("cannot parse {s:?} as DATE for column `{col_name}`"),
11733                })
11734            })?;
11735            Some(Value::Date(d))
11736        }
11737        // v7.14.0 — MySQL DEFAULT clauses quote integer / float
11738        // / boolean literals (`DEFAULT '0'`, `DEFAULT '1'`,
11739        // `DEFAULT '3.14'`, `DEFAULT 'true'`). Coerce the text
11740        // form to the column's numeric / bool type at DEFAULT-
11741        // installation time so the storage check sees a typed
11742        // value. Parse failures fall through to TypeMismatch.
11743        (Value::Text(s), DataType::SmallInt) => s.parse::<i16>().ok().map(Value::SmallInt),
11744        (Value::Text(s), DataType::Int) => s.parse::<i32>().ok().map(Value::Int),
11745        (Value::Text(s), DataType::BigInt) => s.parse::<i64>().ok().map(Value::BigInt),
11746        (Value::Text(s), DataType::Float) => s.parse::<f64>().ok().map(Value::Float),
11747        (Value::Text(s), DataType::Bool) => match s.to_ascii_lowercase().as_str() {
11748            "0" | "false" | "f" | "no" | "off" => Some(Value::Bool(false)),
11749            "1" | "true" | "t" | "yes" | "on" => Some(Value::Bool(true)),
11750            _ => None,
11751        },
11752        // v4.9: Text ↔ JSON coercion. No structural validation —
11753        // any text literal is accepted; the responsibility for
11754        // valid JSON lies with the producer.
11755        (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
11756        (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
11757        // v7.13.3 — mailrs round-7 S10. SPG's storage represents
11758        // both JSON and JSONB on-disk as `Value::Json(String)` —
11759        // they share the underlying text payload. The cast
11760        // `'<text>'::jsonb` produces a Value::Json that needs to
11761        // satisfy a DataType::Jsonb column. Identity coerce in
11762        // both directions so JSON ↔ JSONB assignments work at all
11763        // INSERT / ALTER COLUMN TYPE / DEFAULT contexts.
11764        (Value::Json(s), DataType::Jsonb | DataType::Json) => Some(Value::Json(s)),
11765        // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
11766        //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
11767        //   - Escape: `Hello\\000world`  (backslash + octal triples)
11768        //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
11769        // Errors surface as TypeMismatch so the operator gets a
11770        // clear "this literal isn't a bytea literal" hint.
11771        (Value::Text(s), DataType::Bytes) => {
11772            let bytes = decode_bytea_literal(&s).map_err(|e| {
11773                EngineError::Eval(EvalError::TypeMismatch {
11774                    detail: alloc::format!(
11775                        "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
11776                    ),
11777                })
11778            })?;
11779            Some(Value::Bytes(bytes))
11780        }
11781        // v7.10.4 — BYTEA → Text round-trip uses the PG hex
11782        // output (lowercase, `\x` prefix). Important when a
11783        // SELECT pulls a bytea cell through a Text column path.
11784        (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
11785        // v7.10.11 — Text → TEXT[]. Decode PG's external array
11786        // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
11787        // is the literal `NULL`; everything else is a quoted or
11788        // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
11789        (Value::Text(s), DataType::TextArray) => {
11790            let arr = decode_text_array_literal(&s).map_err(|e| {
11791                EngineError::Eval(EvalError::TypeMismatch {
11792                    detail: alloc::format!(
11793                        "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
11794                    ),
11795                })
11796            })?;
11797            Some(Value::TextArray(arr))
11798        }
11799        // v7.16.0 — Text → IntArray / BigIntArray for the
11800        // spg-sqlx Bind path. Decode the PG external form
11801        // `{1,2,3}` as a TEXT array first, then parse each
11802        // element as int. Same shape as the TextArray decode
11803        // above with an element-wise narrow.
11804        (Value::Text(s), DataType::IntArray) => {
11805            let arr = decode_text_array_literal(&s).map_err(|e| {
11806                EngineError::Eval(EvalError::TypeMismatch {
11807                    detail: alloc::format!(
11808                        "cannot parse {s:?} as INT[] for column `{col_name}`: {e}"
11809                    ),
11810                })
11811            })?;
11812            let mut out: Vec<Option<i32>> = Vec::with_capacity(arr.len());
11813            for elem in arr {
11814                match elem {
11815                    None => out.push(None),
11816                    Some(t) => {
11817                        let n: i32 = t.parse().map_err(|_| {
11818                            EngineError::Eval(EvalError::TypeMismatch {
11819                                detail: alloc::format!(
11820                                    "cannot parse {t:?} as INT element for `{col_name}`"
11821                                ),
11822                            })
11823                        })?;
11824                        out.push(Some(n));
11825                    }
11826                }
11827            }
11828            Some(Value::IntArray(out))
11829        }
11830        (Value::Text(s), DataType::BigIntArray) => {
11831            let arr = decode_text_array_literal(&s).map_err(|e| {
11832                EngineError::Eval(EvalError::TypeMismatch {
11833                    detail: alloc::format!(
11834                        "cannot parse {s:?} as BIGINT[] for column `{col_name}`: {e}"
11835                    ),
11836                })
11837            })?;
11838            let mut out: Vec<Option<i64>> = Vec::with_capacity(arr.len());
11839            for elem in arr {
11840                match elem {
11841                    None => out.push(None),
11842                    Some(t) => {
11843                        let n: i64 = t.parse().map_err(|_| {
11844                            EngineError::Eval(EvalError::TypeMismatch {
11845                                detail: alloc::format!(
11846                                    "cannot parse {t:?} as BIGINT element for `{col_name}`"
11847                                ),
11848                            })
11849                        })?;
11850                        out.push(Some(n));
11851                    }
11852                }
11853            }
11854            Some(Value::BigIntArray(out))
11855        }
11856        // v7.10.11 — TEXT[] → Text round-trip uses PG's
11857        // external array form (`{a,b,NULL}`). Lets a SELECT
11858        // pull an array column through any Text-side codepath.
11859        (Value::TextArray(items), DataType::Text) => Some(Value::Text(encode_text_array(&items))),
11860        (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
11861            let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
11862                EngineError::Eval(EvalError::TypeMismatch {
11863                    detail: alloc::format!(
11864                        "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
11865                    ),
11866                })
11867            })?;
11868            Some(Value::Timestamp(t))
11869        }
11870        // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
11871        // TIMESTAMP → day truncation).
11872        (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
11873            Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
11874        }
11875        // v7.9.21 — Value::Timestamp lands in either Timestamp
11876        // or Timestamptz columns; the on-disk layout is the
11877        // same i64 microseconds UTC.
11878        (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
11879        (Value::Timestamp(t), DataType::Date) => {
11880            let days = t.div_euclid(86_400_000_000);
11881            i32::try_from(days).ok().map(Value::Date)
11882        }
11883        (
11884            Value::Numeric {
11885                scaled,
11886                scale: src_scale,
11887            },
11888            DataType::Numeric { precision, scale },
11889        ) => Some(numeric_rescale(
11890            scaled, src_scale, precision, scale, col_name,
11891        )?),
11892        #[allow(clippy::cast_precision_loss)]
11893        (Value::Numeric { scaled, scale }, DataType::Float) => {
11894            let mut div = 1.0_f64;
11895            for _ in 0..scale {
11896                div *= 10.0;
11897            }
11898            Some(Value::Float((scaled as f64) / div))
11899        }
11900        (Value::Numeric { scaled, scale }, DataType::Int) => {
11901            let truncated = numeric_truncate_to_integer(scaled, scale);
11902            i32::try_from(truncated).ok().map(Value::Int)
11903        }
11904        (Value::Numeric { scaled, scale }, DataType::BigInt) => {
11905            let truncated = numeric_truncate_to_integer(scaled, scale);
11906            i64::try_from(truncated).ok().map(Value::BigInt)
11907        }
11908        (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
11909            let truncated = numeric_truncate_to_integer(scaled, scale);
11910            i16::try_from(truncated).ok().map(Value::SmallInt)
11911        }
11912        // VARCHAR(n) enforces an upper bound on character count.
11913        (Value::Text(s), DataType::Varchar(max)) => {
11914            if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
11915                Some(Value::Text(s))
11916            } else {
11917                return Err(EngineError::Unsupported(alloc::format!(
11918                    "value for VARCHAR({max}) column `{col_name}` exceeds length: \
11919                     {} chars",
11920                    s.chars().count()
11921                )));
11922            }
11923        }
11924        // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
11925        // when the column declares `VECTOR(N) USING SQ8` and
11926        // the INSERT VALUES expression yields a raw f32 vector
11927        // (the normal pgvector-shape literal). Dim mismatch
11928        // falls through the `_ => None` arm and surfaces as
11929        // `TypeMismatch` with the expected SQ8 column type —
11930        // matching the F32 path's existing error.
11931        (
11932            Value::Vector(v),
11933            DataType::Vector {
11934                dim,
11935                encoding: VecEncoding::Sq8,
11936            },
11937        ) if v.len() == dim as usize => Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v))),
11938        // v6.0.3: f32 → f16 INSERT-time conversion for HALF
11939        // columns. Bit-exact at the storage layer (modulo
11940        // half-precision rounding); no rerank pass needed at
11941        // search time.
11942        (
11943            Value::Vector(v),
11944            DataType::Vector {
11945                dim,
11946                encoding: VecEncoding::F16,
11947            },
11948        ) if v.len() == dim as usize => Some(Value::HalfVector(
11949            spg_storage::halfvec::HalfVector::from_f32_slice(&v),
11950        )),
11951        // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
11952        // is already longer we reject (PG truncates trailing-space-only;
11953        // staying strict for v1).
11954        (Value::Text(s), DataType::Char(size)) => {
11955            let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
11956            if len > size {
11957                return Err(EngineError::Unsupported(alloc::format!(
11958                    "value for CHAR({size}) column `{col_name}` exceeds length: \
11959                     {len} chars"
11960                )));
11961            }
11962            let need = (size - len) as usize;
11963            let mut padded = s;
11964            padded.reserve(need);
11965            for _ in 0..need {
11966                padded.push(' ');
11967            }
11968            Some(Value::Text(padded))
11969        }
11970        _ => None,
11971    };
11972    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
11973        column: col_name.into(),
11974        expected,
11975        actual,
11976        position,
11977    }))
11978}
11979
11980/// v7.12.4 — render a function arg list into the
11981/// canonical form the storage layer caches as
11982/// [`spg_storage::FunctionDef::args_repr`]. The catalogue uses
11983/// this string for both display + as a coarse signature key
11984/// for the (deferred) overload resolution v7.12.5+ adds.
11985fn render_function_args(args: &[spg_sql::ast::FunctionArg]) -> alloc::string::String {
11986    use core::fmt::Write;
11987    let mut out = alloc::string::String::from("(");
11988    for (i, a) in args.iter().enumerate() {
11989        if i > 0 {
11990            out.push_str(", ");
11991        }
11992        match a.mode {
11993            spg_sql::ast::FunctionArgMode::In => {}
11994            spg_sql::ast::FunctionArgMode::Out => out.push_str("OUT "),
11995            spg_sql::ast::FunctionArgMode::InOut => out.push_str("INOUT "),
11996        }
11997        if let Some(n) = &a.name {
11998            out.push_str(n);
11999            out.push(' ');
12000        }
12001        match &a.ty {
12002            spg_sql::ast::FunctionArgType::Typed(t) => {
12003                let _ = write!(out, "{t}");
12004            }
12005            spg_sql::ast::FunctionArgType::Raw(s) => out.push_str(s),
12006        }
12007    }
12008    out.push(')');
12009    out
12010}
12011
12012#[cfg(test)]
12013mod tests {
12014    use super::*;
12015    use alloc::vec;
12016
12017    fn unwrap_command_ok(r: &QueryResult) -> usize {
12018        match r {
12019            QueryResult::CommandOk { affected, .. } => *affected,
12020            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
12021        }
12022    }
12023
12024    #[test]
12025    fn create_table_registers_schema() {
12026        let mut e = Engine::new();
12027        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
12028            .unwrap();
12029        assert_eq!(e.catalog().table_count(), 1);
12030        let t = e.catalog().get("foo").unwrap();
12031        assert_eq!(t.schema().columns.len(), 2);
12032        assert_eq!(t.schema().columns[0].ty, DataType::Int);
12033        assert!(!t.schema().columns[0].nullable);
12034        assert_eq!(t.schema().columns[1].ty, DataType::Text);
12035    }
12036
12037    #[test]
12038    fn create_table_vector_default_is_f32_encoded() {
12039        let mut e = Engine::new();
12040        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
12041        let t = e.catalog().get("t").unwrap();
12042        assert_eq!(
12043            t.schema().columns[0].ty,
12044            DataType::Vector {
12045                dim: 8,
12046                encoding: VecEncoding::F32,
12047            },
12048        );
12049    }
12050
12051    #[test]
12052    fn create_table_vector_using_sq8_succeeds() {
12053        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
12054        // is lifted. CREATE TABLE persists an SQ8 column type in
12055        // the catalog; INSERT (next test) quantises raw f32 input.
12056        let mut e = Engine::new();
12057        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
12058        let t = e.catalog().get("t").unwrap();
12059        assert_eq!(
12060            t.schema().columns[0].ty,
12061            DataType::Vector {
12062                dim: 8,
12063                encoding: VecEncoding::Sq8,
12064            },
12065        );
12066    }
12067
12068    #[test]
12069    fn insert_into_sq8_column_quantises_f32_payload() {
12070        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
12071        // `Value::Vector(Vec<f32>)` literal into the column's
12072        // quantised representation. The row that lands in the
12073        // catalog must therefore hold a `Value::Sq8Vector`, not the
12074        // original f32 buffer — that's the bit that delivers the
12075        // 4× compression target.
12076        let mut e = Engine::new();
12077        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
12078        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
12079            .unwrap();
12080        let t = e.catalog().get("t").unwrap();
12081        assert_eq!(t.rows().len(), 1);
12082        match &t.rows()[0].values[0] {
12083            Value::Sq8Vector(q) => {
12084                assert_eq!(q.bytes.len(), 4);
12085                // min/max are derived from the payload: min=0.0, max=1.0.
12086                assert!((q.min - 0.0).abs() < 1e-6);
12087                assert!((q.max - 1.0).abs() < 1e-6);
12088            }
12089            other => panic!("expected Sq8Vector cell, got {other:?}"),
12090        }
12091    }
12092
12093    #[test]
12094    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
12095        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
12096        // converts the incoming `Value::Vector(Vec<f32>)` cell
12097        // into `Value::HalfVector(HalfVector)` via the new
12098        // `coerce_value` arm. The dequantised round-trip is
12099        // bit-exact for f16-representable values, so 0.0 / 0.25
12100        // / 0.5 / 1.0 hit their grid points exactly.
12101        let mut e = Engine::new();
12102        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
12103            .unwrap();
12104        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
12105            .unwrap();
12106        let t = e.catalog().get("t").unwrap();
12107        assert_eq!(t.rows().len(), 1);
12108        match &t.rows()[0].values[0] {
12109            Value::HalfVector(h) => {
12110                assert_eq!(h.dim(), 4);
12111                let back = h.to_f32_vec();
12112                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
12113                for (g, e) in back.iter().zip(expected.iter()) {
12114                    assert!(
12115                        (g - e).abs() < 1e-6,
12116                        "{g} vs {e} should be exact on f16 grid"
12117                    );
12118                }
12119            }
12120            other => panic!("expected HalfVector cell, got {other:?}"),
12121        }
12122    }
12123
12124    #[test]
12125    fn alter_index_rebuild_in_place_succeeds() {
12126        // v6.0.4: bare REBUILD (no encoding switch) walks every
12127        // row again to rebuild the NSW graph. Verifies the engine
12128        // dispatch + storage helper plumbing without changing any
12129        // cell encoding.
12130        let mut e = Engine::new();
12131        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
12132            .unwrap();
12133        for i in 0..8_i32 {
12134            #[allow(clippy::cast_precision_loss)]
12135            let base = (i as f32) * 0.1;
12136            e.execute(&alloc::format!(
12137                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
12138                b1 = base + 0.01,
12139                b2 = base + 0.02,
12140            ))
12141            .unwrap();
12142        }
12143        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
12144        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
12145        // Schema encoding stays F32 (no encoding clause).
12146        assert_eq!(
12147            e.catalog().get("t").unwrap().schema().columns[1].ty,
12148            DataType::Vector {
12149                dim: 3,
12150                encoding: VecEncoding::F32,
12151            },
12152        );
12153    }
12154
12155    #[test]
12156    fn alter_index_rebuild_with_encoding_switches_cell_type() {
12157        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
12158        // stored cell from F32 → SQ8 + rebuilds the graph atop the
12159        // new encoding. Post-rebuild, cells must be Sq8Vector and
12160        // the schema must report encoding = Sq8.
12161        let mut e = Engine::new();
12162        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
12163            .unwrap();
12164        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
12165            .unwrap();
12166        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
12167        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
12168            .unwrap();
12169        let t = e.catalog().get("t").unwrap();
12170        assert_eq!(
12171            t.schema().columns[1].ty,
12172            DataType::Vector {
12173                dim: 4,
12174                encoding: VecEncoding::Sq8,
12175            },
12176        );
12177        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
12178    }
12179
12180    #[test]
12181    fn alter_index_rebuild_unknown_index_errors() {
12182        let mut e = Engine::new();
12183        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
12184        assert!(
12185            matches!(
12186                &err,
12187                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
12188            ),
12189            "got: {err}"
12190        );
12191    }
12192
12193    #[test]
12194    fn alter_index_rebuild_on_btree_index_errors() {
12195        // REBUILD on a B-tree index has no semantic meaning in
12196        // v6.0.4 — rejected at the storage layer with `Unsupported`.
12197        let mut e = Engine::new();
12198        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12199        e.execute("INSERT INTO t VALUES (1)").unwrap();
12200        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
12201        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
12202        assert!(
12203            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
12204            "got: {err}"
12205        );
12206    }
12207
12208    #[test]
12209    fn prepared_insert_substitutes_placeholders() {
12210        // v6.1.1: prepare() parses once; execute_prepared() walks the
12211        // AST and replaces $1/$2 with the param Values BEFORE the
12212        // dispatch sees them. Same logical result as a simple-query
12213        // INSERT, but parse happens once per *statement*, not per
12214        // execution.
12215        let mut e = Engine::new();
12216        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
12217            .unwrap();
12218        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
12219        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
12220            e.execute_prepared(stmt.clone(), &[Value::Int(id), Value::Text(name.into())])
12221                .unwrap();
12222        }
12223        // Read back via simple-query SELECT.
12224        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
12225        let QueryResult::Rows { rows, .. } = rows_result else {
12226            panic!("expected Rows")
12227        };
12228        assert_eq!(rows.len(), 3);
12229    }
12230
12231    #[test]
12232    fn prepared_select_with_placeholder_filters_rows() {
12233        let mut e = Engine::new();
12234        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
12235            .unwrap();
12236        for i in 0..10_i32 {
12237            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
12238                .unwrap();
12239        }
12240        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
12241        let QueryResult::Rows { rows, .. } = e.execute_prepared(stmt, &[Value::Int(35)]).unwrap()
12242        else {
12243            panic!("expected Rows")
12244        };
12245        // v = 35 means i*7 = 35 → i = 5.
12246        assert_eq!(rows.len(), 1);
12247        assert_eq!(rows[0].values[0], Value::Int(5));
12248    }
12249
12250    #[test]
12251    fn prepared_too_few_params_errors() {
12252        let mut e = Engine::new();
12253        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12254        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
12255        let err = e.execute_prepared(stmt, &[]).unwrap_err();
12256        assert!(
12257            matches!(
12258                &err,
12259                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
12260            ),
12261            "got: {err}"
12262        );
12263    }
12264
12265    #[test]
12266    fn insert_into_half_column_dim_mismatch_errors() {
12267        let mut e = Engine::new();
12268        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
12269            .unwrap();
12270        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
12271        assert!(matches!(
12272            &err,
12273            EngineError::Storage(StorageError::TypeMismatch { .. })
12274        ));
12275    }
12276
12277    #[test]
12278    fn insert_into_sq8_column_dim_mismatch_errors() {
12279        // Dim mismatch falls through the `coerce_value` Vector→Sq8
12280        // arm's guard and surfaces as `TypeMismatch` — the same
12281        // error the F32 path produces today, so client error
12282        // handling stays uniform across encodings.
12283        let mut e = Engine::new();
12284        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
12285        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
12286        assert!(
12287            matches!(
12288                &err,
12289                EngineError::Storage(StorageError::TypeMismatch { .. })
12290            ),
12291            "got: {err}",
12292        );
12293    }
12294
12295    #[test]
12296    fn create_table_duplicate_errors() {
12297        let mut e = Engine::new();
12298        e.execute("CREATE TABLE foo (a INT)").unwrap();
12299        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
12300        assert!(matches!(
12301            err,
12302            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
12303        ));
12304    }
12305
12306    #[test]
12307    fn insert_into_unknown_table_errors() {
12308        let mut e = Engine::new();
12309        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
12310        assert!(matches!(
12311            err,
12312            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
12313        ));
12314    }
12315
12316    #[test]
12317    fn insert_happy_path_reports_one_affected() {
12318        let mut e = Engine::new();
12319        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12320        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
12321        assert_eq!(unwrap_command_ok(&r), 1);
12322        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
12323    }
12324
12325    #[test]
12326    fn insert_arity_mismatch_propagates() {
12327        let mut e = Engine::new();
12328        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
12329        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
12330        assert!(matches!(
12331            err,
12332            EngineError::Storage(StorageError::ArityMismatch { .. })
12333        ));
12334    }
12335
12336    #[test]
12337    fn insert_negative_integer_via_unary_minus() {
12338        let mut e = Engine::new();
12339        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12340        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
12341        let rows = e.catalog().get("foo").unwrap().rows();
12342        assert_eq!(rows[0].values[0], Value::Int(-7));
12343    }
12344
12345    #[test]
12346    fn insert_non_literal_expr_unsupported() {
12347        let mut e = Engine::new();
12348        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12349        let err = e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap_err();
12350        assert!(matches!(err, EngineError::Unsupported(_)));
12351    }
12352
12353    #[test]
12354    fn select_star_returns_all_rows_in_insertion_order() {
12355        let mut e = Engine::new();
12356        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
12357            .unwrap();
12358        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
12359        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
12360        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
12361
12362        let r = e.execute("SELECT * FROM foo").unwrap();
12363        let QueryResult::Rows { columns, rows } = r else {
12364            panic!("expected Rows")
12365        };
12366        assert_eq!(columns.len(), 2);
12367        assert_eq!(columns[0].name, "a");
12368        assert_eq!(rows.len(), 3);
12369        assert_eq!(
12370            rows[1].values,
12371            vec![Value::Int(2), Value::Text("two".into())]
12372        );
12373    }
12374
12375    #[test]
12376    fn select_star_on_empty_table_returns_zero_rows() {
12377        let mut e = Engine::new();
12378        e.execute("CREATE TABLE foo (a INT)").unwrap();
12379        let r = e.execute("SELECT * FROM foo").unwrap();
12380        match r {
12381            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
12382            QueryResult::CommandOk { .. } => panic!("expected Rows"),
12383        }
12384    }
12385
12386    // --- v0.4: WHERE + projection ------------------------------------------
12387
12388    fn make_three_row_users(e: &mut Engine) {
12389        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
12390            .unwrap();
12391        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
12392            .unwrap();
12393        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
12394            .unwrap();
12395        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
12396            .unwrap();
12397    }
12398
12399    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
12400        match r {
12401            QueryResult::Rows { columns, rows } => (columns, rows),
12402            QueryResult::CommandOk { .. } => panic!("expected Rows"),
12403        }
12404    }
12405
12406    #[test]
12407    fn where_filter_passes_only_true_rows() {
12408        let mut e = Engine::new();
12409        make_three_row_users(&mut e);
12410        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
12411        let (_, rows) = unwrap_rows(r);
12412        assert_eq!(rows.len(), 2);
12413        assert_eq!(rows[0].values[0], Value::Int(2));
12414        assert_eq!(rows[1].values[0], Value::Int(3));
12415    }
12416
12417    #[test]
12418    fn where_with_null_result_filters_out_row() {
12419        let mut e = Engine::new();
12420        make_three_row_users(&mut e);
12421        // score is NULL for bob → score > 80 is NULL → row excluded
12422        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
12423        let (_, rows) = unwrap_rows(r);
12424        assert_eq!(rows.len(), 1);
12425        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
12426    }
12427
12428    #[test]
12429    fn projection_named_columns() {
12430        let mut e = Engine::new();
12431        make_three_row_users(&mut e);
12432        let r = e.execute("SELECT name, score FROM users").unwrap();
12433        let (cols, rows) = unwrap_rows(r);
12434        assert_eq!(cols.len(), 2);
12435        assert_eq!(cols[0].name, "name");
12436        assert_eq!(cols[1].name, "score");
12437        assert_eq!(rows.len(), 3);
12438        assert_eq!(
12439            rows[0].values,
12440            vec![Value::Text("alice".into()), Value::Int(90)]
12441        );
12442    }
12443
12444    #[test]
12445    fn projection_with_column_alias() {
12446        let mut e = Engine::new();
12447        make_three_row_users(&mut e);
12448        let r = e
12449            .execute("SELECT name AS who FROM users WHERE id = 1")
12450            .unwrap();
12451        let (cols, rows) = unwrap_rows(r);
12452        assert_eq!(cols[0].name, "who");
12453        assert_eq!(rows.len(), 1);
12454        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
12455    }
12456
12457    #[test]
12458    fn qualified_column_with_table_alias_resolves() {
12459        let mut e = Engine::new();
12460        make_three_row_users(&mut e);
12461        let r = e
12462            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
12463            .unwrap();
12464        let (cols, rows) = unwrap_rows(r);
12465        assert_eq!(cols.len(), 2);
12466        assert_eq!(rows.len(), 2);
12467    }
12468
12469    #[test]
12470    fn qualified_column_with_wrong_alias_errors() {
12471        let mut e = Engine::new();
12472        make_three_row_users(&mut e);
12473        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
12474        assert!(matches!(
12475            err,
12476            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
12477        ));
12478    }
12479
12480    #[test]
12481    fn select_unknown_column_errors_in_projection() {
12482        let mut e = Engine::new();
12483        make_three_row_users(&mut e);
12484        let err = e.execute("SELECT ghost FROM users").unwrap_err();
12485        assert!(matches!(
12486            err,
12487            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
12488        ));
12489    }
12490
12491    #[test]
12492    fn where_unknown_column_errors() {
12493        let mut e = Engine::new();
12494        make_three_row_users(&mut e);
12495        let err = e
12496            .execute("SELECT * FROM users WHERE ghost = 1")
12497            .unwrap_err();
12498        assert!(matches!(
12499            err,
12500            EngineError::Eval(EvalError::ColumnNotFound { .. })
12501        ));
12502    }
12503
12504    #[test]
12505    fn expression_projection_evaluates_and_renders() {
12506        // Compound expressions in the SELECT list are evaluated per row;
12507        // the output column is typed TEXT, name defaults to the expression.
12508        let mut e = Engine::new();
12509        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
12510        e.execute("INSERT INTO t VALUES (3)").unwrap();
12511        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
12512        assert_eq!(rows.len(), 1);
12513        // The expression evaluates to integer 3; rendered as the cell value
12514        // (storage::Value::Int(3) since arithmetic kept ints).
12515        assert_eq!(rows[0].values[0], Value::Int(3));
12516    }
12517
12518    #[test]
12519    fn select_unknown_table_errors() {
12520        let mut e = Engine::new();
12521        let err = e.execute("SELECT * FROM ghost").unwrap_err();
12522        assert!(matches!(
12523            err,
12524            EngineError::Storage(StorageError::TableNotFound { .. })
12525        ));
12526    }
12527
12528    #[test]
12529    fn invalid_sql_returns_parse_error() {
12530        // v4.4: UPDATE is now real SQL, so use a true syntactic
12531        // garbage payload for the parse-error path.
12532        let mut e = Engine::new();
12533        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
12534        assert!(matches!(err, EngineError::Parse(_)));
12535    }
12536
12537    // --- v0.8 CREATE INDEX + index seek ------------------------------------
12538
12539    #[test]
12540    fn create_index_registers_on_table() {
12541        let mut e = Engine::new();
12542        make_three_row_users(&mut e);
12543        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
12544        let t = e.catalog().get("users").unwrap();
12545        assert_eq!(t.indices().len(), 1);
12546        assert_eq!(t.indices()[0].name, "by_name");
12547    }
12548
12549    #[test]
12550    fn create_index_on_unknown_table_errors() {
12551        let mut e = Engine::new();
12552        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
12553        assert!(matches!(
12554            err,
12555            EngineError::Storage(StorageError::TableNotFound { .. })
12556        ));
12557    }
12558
12559    #[test]
12560    fn create_index_on_unknown_column_errors() {
12561        let mut e = Engine::new();
12562        make_three_row_users(&mut e);
12563        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
12564        assert!(matches!(
12565            err,
12566            EngineError::Storage(StorageError::ColumnNotFound { .. })
12567        ));
12568    }
12569
12570    #[test]
12571    fn select_eq_uses_index_returns_same_rows_as_scan() {
12572        // Build two engines: one with an index, one without. Same query →
12573        // same row set (index is a planner optimisation, not a semantic
12574        // change).
12575        let mut without = Engine::new();
12576        make_three_row_users(&mut without);
12577        let mut with = Engine::new();
12578        make_three_row_users(&mut with);
12579        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
12580
12581        let q = "SELECT * FROM users WHERE id = 2";
12582        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
12583        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
12584        assert_eq!(no_idx_rows, idx_rows);
12585        assert_eq!(idx_rows.len(), 1);
12586    }
12587
12588    #[test]
12589    fn select_eq_with_no_matching_index_value_returns_empty() {
12590        let mut e = Engine::new();
12591        make_three_row_users(&mut e);
12592        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
12593        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
12594        assert_eq!(rows.len(), 0);
12595    }
12596
12597    // --- v0.9 transactions -------------------------------------------------
12598
12599    #[test]
12600    fn begin_sets_in_transaction_flag() {
12601        let mut e = Engine::new();
12602        assert!(!e.in_transaction());
12603        e.execute("BEGIN").unwrap();
12604        assert!(e.in_transaction());
12605    }
12606
12607    #[test]
12608    fn double_begin_errors() {
12609        let mut e = Engine::new();
12610        e.execute("BEGIN").unwrap();
12611        let err = e.execute("BEGIN").unwrap_err();
12612        assert_eq!(err, EngineError::TransactionAlreadyOpen);
12613    }
12614
12615    #[test]
12616    fn commit_without_begin_errors() {
12617        let mut e = Engine::new();
12618        let err = e.execute("COMMIT").unwrap_err();
12619        assert_eq!(err, EngineError::NoActiveTransaction);
12620    }
12621
12622    #[test]
12623    fn rollback_without_begin_errors() {
12624        let mut e = Engine::new();
12625        let err = e.execute("ROLLBACK").unwrap_err();
12626        assert_eq!(err, EngineError::NoActiveTransaction);
12627    }
12628
12629    #[test]
12630    fn commit_applies_shadow_to_committed_catalog() {
12631        let mut e = Engine::new();
12632        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12633        e.execute("BEGIN").unwrap();
12634        e.execute("INSERT INTO t VALUES (1)").unwrap();
12635        e.execute("INSERT INTO t VALUES (2)").unwrap();
12636        e.execute("COMMIT").unwrap();
12637        assert!(!e.in_transaction());
12638        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
12639    }
12640
12641    #[test]
12642    fn rollback_discards_shadow() {
12643        let mut e = Engine::new();
12644        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12645        e.execute("BEGIN").unwrap();
12646        e.execute("INSERT INTO t VALUES (1)").unwrap();
12647        e.execute("INSERT INTO t VALUES (2)").unwrap();
12648        e.execute("ROLLBACK").unwrap();
12649        assert!(!e.in_transaction());
12650        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
12651    }
12652
12653    #[test]
12654    fn select_during_tx_sees_uncommitted_writes_own_session() {
12655        // The shadow catalog is read by SELECTs while a TX is open — the
12656        // session can see its own pending writes.
12657        let mut e = Engine::new();
12658        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12659        e.execute("BEGIN").unwrap();
12660        e.execute("INSERT INTO t VALUES (42)").unwrap();
12661        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
12662        assert_eq!(rows.len(), 1);
12663        assert_eq!(rows[0].values[0], Value::Int(42));
12664    }
12665
12666    #[test]
12667    fn snapshot_with_no_users_is_bare_catalog_format() {
12668        let mut e = Engine::new();
12669        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12670        let bytes = e.snapshot();
12671        assert_eq!(
12672            &bytes[..8],
12673            b"SPGDB001",
12674            "must be the bare v3.x catalog magic"
12675        );
12676        let e2 = Engine::restore_envelope(&bytes).unwrap();
12677        assert!(e2.users().is_empty());
12678        assert_eq!(e2.catalog().table_count(), 1);
12679    }
12680
12681    #[test]
12682    fn snapshot_with_users_round_trips_both_via_envelope() {
12683        let mut e = Engine::new();
12684        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12685        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
12686        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
12687            .unwrap();
12688        let bytes = e.snapshot();
12689        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
12690        let e2 = Engine::restore_envelope(&bytes).unwrap();
12691        assert_eq!(e2.users().len(), 2);
12692        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
12693        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
12694        assert_eq!(e2.verify_user("alice", "wrong"), None);
12695        assert_eq!(e2.catalog().table_count(), 1);
12696    }
12697
12698    #[test]
12699    fn ddl_inside_tx_also_rolled_back() {
12700        let mut e = Engine::new();
12701        e.execute("BEGIN").unwrap();
12702        e.execute("CREATE TABLE t (v INT)").unwrap();
12703        // Visible inside the TX.
12704        e.execute("SELECT * FROM t").unwrap();
12705        e.execute("ROLLBACK").unwrap();
12706        // Gone after rollback.
12707        let err = e.execute("SELECT * FROM t").unwrap_err();
12708        assert!(matches!(
12709            err,
12710            EngineError::Storage(StorageError::TableNotFound { .. })
12711        ));
12712    }
12713
12714    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
12715
12716    #[test]
12717    fn create_publication_lands_in_catalog() {
12718        let mut e = Engine::new();
12719        assert!(e.publications().is_empty());
12720        e.execute("CREATE PUBLICATION pub_a").unwrap();
12721        assert_eq!(e.publications().len(), 1);
12722        assert!(e.publications().contains("pub_a"));
12723    }
12724
12725    #[test]
12726    fn create_publication_duplicate_errors() {
12727        let mut e = Engine::new();
12728        e.execute("CREATE PUBLICATION pub_a").unwrap();
12729        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
12730        assert!(
12731            alloc::format!("{err:?}").contains("DuplicateName"),
12732            "got {err:?}"
12733        );
12734    }
12735
12736    #[test]
12737    fn drop_publication_silent_when_absent() {
12738        let mut e = Engine::new();
12739        // PG-compatible: DROP a publication that doesn't exist
12740        // succeeds (no-op) but reports zero affected.
12741        let r = e.execute("DROP PUBLICATION nope").unwrap();
12742        match r {
12743            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
12744            other => panic!("expected CommandOk, got {other:?}"),
12745        }
12746    }
12747
12748    #[test]
12749    fn drop_publication_present_reports_one_affected() {
12750        let mut e = Engine::new();
12751        e.execute("CREATE PUBLICATION pub_a").unwrap();
12752        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
12753        match r {
12754            QueryResult::CommandOk {
12755                affected,
12756                modified_catalog,
12757            } => {
12758                assert_eq!(affected, 1);
12759                assert!(modified_catalog);
12760            }
12761            other => panic!("expected CommandOk, got {other:?}"),
12762        }
12763        assert!(e.publications().is_empty());
12764    }
12765
12766    #[test]
12767    fn publications_persist_across_snapshot_restore() {
12768        // The persist-across-restart ship-gate at the engine layer —
12769        // snapshot → restore_envelope round trip must preserve the
12770        // publication catalog. The spg-server e2e covers the
12771        // process-restart variant.
12772        let mut e = Engine::new();
12773        e.execute("CREATE PUBLICATION pub_a").unwrap();
12774        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES")
12775            .unwrap();
12776        let snap = e.snapshot();
12777        let e2 = Engine::restore_envelope(&snap).unwrap();
12778        assert_eq!(e2.publications().len(), 2);
12779        assert!(e2.publications().contains("pub_a"));
12780        assert!(e2.publications().contains("pub_b"));
12781    }
12782
12783    #[test]
12784    fn create_publication_allowed_inside_transaction() {
12785        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
12786        // CREATE PUBLICATION inside a TX and the auto-commit
12787        // wrap path needs the same allowance.
12788        let mut e = Engine::new();
12789        e.execute("BEGIN").unwrap();
12790        e.execute("CREATE PUBLICATION pub_a").unwrap();
12791        e.execute("COMMIT").unwrap();
12792        assert!(e.publications().contains("pub_a"));
12793    }
12794
12795    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
12796
12797    #[test]
12798    fn create_publication_for_table_list_lands_with_scope() {
12799        let mut e = Engine::new();
12800        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
12801        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
12802        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
12803            .unwrap();
12804        let scope = e.publications().get("pub_a").cloned();
12805        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
12806            panic!("expected ForTables scope, got {scope:?}")
12807        };
12808        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12809    }
12810
12811    #[test]
12812    fn create_publication_all_tables_except_lands_with_scope() {
12813        let mut e = Engine::new();
12814        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
12815            .unwrap();
12816        let scope = e.publications().get("pub_a").cloned();
12817        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
12818            panic!("expected AllTablesExcept scope, got {scope:?}")
12819        };
12820        assert_eq!(ts, alloc::vec!["t3".to_string()]);
12821    }
12822
12823    #[test]
12824    fn show_publications_empty_returns_zero_rows() {
12825        let e = Engine::new();
12826        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12827        let QueryResult::Rows { rows, columns } = r else {
12828            panic!()
12829        };
12830        assert!(rows.is_empty());
12831        assert_eq!(columns.len(), 3);
12832        assert_eq!(columns[0].name, "name");
12833        assert_eq!(columns[1].name, "scope");
12834        assert_eq!(columns[2].name, "table_count");
12835    }
12836
12837    #[test]
12838    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
12839        let mut e = Engine::new();
12840        e.execute("CREATE PUBLICATION z_pub").unwrap();
12841        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
12842            .unwrap();
12843        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
12844            .unwrap();
12845        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12846        let QueryResult::Rows { rows, .. } = r else {
12847            panic!()
12848        };
12849        assert_eq!(rows.len(), 3);
12850        // Alphabetical order: a_pub, m_pub, z_pub.
12851        let names: Vec<&str> = rows
12852            .iter()
12853            .map(|r| {
12854                if let Value::Text(s) = &r.values[0] {
12855                    s.as_str()
12856                } else {
12857                    panic!()
12858                }
12859            })
12860            .collect();
12861        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
12862        // Row 0 — a_pub scope summary + table_count = 2.
12863        match &rows[0].values[1] {
12864            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
12865            other => panic!("expected Text, got {other:?}"),
12866        }
12867        assert_eq!(rows[0].values[2], Value::Int(2));
12868        // Row 1 — m_pub.
12869        match &rows[1].values[1] {
12870            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
12871            other => panic!("expected Text, got {other:?}"),
12872        }
12873        assert_eq!(rows[1].values[2], Value::Int(1));
12874        // Row 2 — z_pub (AllTables → NULL count).
12875        match &rows[2].values[1] {
12876            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
12877            other => panic!("expected Text, got {other:?}"),
12878        }
12879        assert_eq!(rows[2].values[2], Value::Null);
12880    }
12881
12882    #[test]
12883    fn for_list_scopes_persist_across_snapshot() {
12884        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
12885        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
12886        let mut e = Engine::new();
12887        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
12888        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
12889            .unwrap();
12890        let snap = e.snapshot();
12891        let e2 = Engine::restore_envelope(&snap).unwrap();
12892        assert_eq!(e2.publications().len(), 2);
12893        let p1 = e2.publications().get("p1").cloned();
12894        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
12895            panic!("p1 scope lost: {p1:?}")
12896        };
12897        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12898        let p2 = e2.publications().get("p2").cloned();
12899        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
12900            panic!("p2 scope lost: {p2:?}")
12901        };
12902        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
12903    }
12904
12905    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
12906
12907    #[test]
12908    fn create_subscription_lands_in_catalog_with_defaults() {
12909        let mut e = Engine::new();
12910        e.execute(
12911            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
12912        )
12913        .unwrap();
12914        let s = e.subscriptions().get("sub_a").cloned().expect("present");
12915        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
12916        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
12917        assert!(s.enabled);
12918        assert_eq!(s.last_received_pos, 0);
12919    }
12920
12921    #[test]
12922    fn create_subscription_duplicate_name_errors() {
12923        let mut e = Engine::new();
12924        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
12925            .unwrap();
12926        let err = e
12927            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
12928            .unwrap_err();
12929        assert!(
12930            alloc::format!("{err:?}").contains("DuplicateName"),
12931            "got {err:?}"
12932        );
12933    }
12934
12935    #[test]
12936    fn drop_subscription_silent_when_absent() {
12937        let mut e = Engine::new();
12938        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
12939        match r {
12940            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
12941            other => panic!("expected CommandOk, got {other:?}"),
12942        }
12943    }
12944
12945    #[test]
12946    fn subscription_advance_updates_last_pos_monotone() {
12947        let mut e = Engine::new();
12948        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
12949            .unwrap();
12950        assert!(e.subscription_advance("s", 100));
12951        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
12952        assert!(e.subscription_advance("s", 50)); // stale → ignored
12953        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
12954        assert!(e.subscription_advance("s", 200));
12955        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
12956        assert!(!e.subscription_advance("missing", 1));
12957    }
12958
12959    #[test]
12960    fn show_subscriptions_returns_rows_ordered_by_name() {
12961        let mut e = Engine::new();
12962        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
12963            .unwrap();
12964        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
12965            .unwrap();
12966        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
12967        let QueryResult::Rows { rows, columns } = r else {
12968            panic!()
12969        };
12970        assert_eq!(rows.len(), 2);
12971        assert_eq!(columns.len(), 5);
12972        assert_eq!(columns[0].name, "name");
12973        assert_eq!(columns[4].name, "last_received_pos");
12974        // Alphabetical: a_sub, z_sub.
12975        let names: Vec<&str> = rows
12976            .iter()
12977            .map(|r| {
12978                if let Value::Text(s) = &r.values[0] {
12979                    s.as_str()
12980                } else {
12981                    panic!()
12982                }
12983            })
12984            .collect();
12985        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
12986        // Row 0: a_sub
12987        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
12988        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
12989        assert_eq!(rows[0].values[3], Value::Bool(true));
12990        assert_eq!(rows[0].values[4], Value::BigInt(0));
12991        // Row 1: z_sub — publications join with ", "
12992        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
12993    }
12994
12995    #[test]
12996    fn subscriptions_persist_across_snapshot_envelope_v4() {
12997        let mut e = Engine::new();
12998        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
12999            .unwrap();
13000        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
13001            .unwrap();
13002        e.subscription_advance("s2", 42);
13003        let snap = e.snapshot();
13004        let e2 = Engine::restore_envelope(&snap).unwrap();
13005        assert_eq!(e2.subscriptions().len(), 2);
13006        let s1 = e2.subscriptions().get("s1").unwrap();
13007        assert_eq!(s1.conn_str, "h=A");
13008        assert_eq!(
13009            s1.publications,
13010            alloc::vec!["p1".to_string(), "p2".to_string()]
13011        );
13012        assert_eq!(s1.last_received_pos, 0);
13013        let s2 = e2.subscriptions().get("s2").unwrap();
13014        assert_eq!(s2.last_received_pos, 42);
13015    }
13016
13017    #[test]
13018    fn v3_envelope_loads_with_empty_subscriptions() {
13019        // v3 snapshot (publications-only). Forge it by hand so we
13020        // verify v6.1.4 readers don't panic — they must surface
13021        // empty subscriptions and a populated publication table.
13022        let mut e = Engine::new();
13023        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
13024        let catalog = e.catalog.serialize();
13025        let users = crate::users::serialize_users(&e.users);
13026        let pubs = e.publications.serialize();
13027        let mut buf = Vec::new();
13028        buf.extend_from_slice(b"SPGENV01");
13029        buf.push(3u8); // v3
13030        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13031        buf.extend_from_slice(&catalog);
13032        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13033        buf.extend_from_slice(&users);
13034        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
13035        buf.extend_from_slice(&pubs);
13036        let crc = spg_crypto::crc32::crc32(&buf);
13037        buf.extend_from_slice(&crc.to_le_bytes());
13038
13039        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
13040        assert!(e2.subscriptions().is_empty());
13041        assert!(e2.publications().contains("pub_legacy"));
13042    }
13043
13044    #[test]
13045    fn create_subscription_allowed_inside_transaction() {
13046        let mut e = Engine::new();
13047        e.execute("BEGIN").unwrap();
13048        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
13049            .unwrap();
13050        e.execute("COMMIT").unwrap();
13051        assert!(e.subscriptions().contains("s"));
13052    }
13053
13054    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
13055    #[test]
13056    fn analyze_populates_histogram_bounds() {
13057        let mut e = Engine::new();
13058        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)")
13059            .unwrap();
13060        for i in 0..50 {
13061            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'name{i}')"))
13062                .unwrap();
13063        }
13064        e.execute("ANALYZE t").unwrap();
13065        let stats = e.statistics();
13066        let id_stats = stats.get("t", "id").unwrap();
13067        assert!(id_stats.histogram_bounds.len() >= 2);
13068        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
13069        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
13070        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
13071        assert_eq!(id_stats.n_distinct, 50);
13072    }
13073
13074    #[test]
13075    fn reanalyze_overwrites_prior_stats() {
13076        let mut e = Engine::new();
13077        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13078        for i in 0..10 {
13079            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13080                .unwrap();
13081        }
13082        e.execute("ANALYZE t").unwrap();
13083        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
13084        assert_eq!(n1, 10);
13085        for i in 10..30 {
13086            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13087                .unwrap();
13088        }
13089        e.execute("ANALYZE t").unwrap();
13090        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
13091        assert_eq!(n2, 30);
13092    }
13093
13094    #[test]
13095    fn analyze_unknown_table_errors() {
13096        let mut e = Engine::new();
13097        let err = e.execute("ANALYZE nonexistent").unwrap_err();
13098        assert!(matches!(
13099            err,
13100            EngineError::Storage(StorageError::TableNotFound { .. })
13101        ));
13102    }
13103
13104    #[test]
13105    fn bare_analyze_covers_all_user_tables() {
13106        let mut e = Engine::new();
13107        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
13108        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
13109        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
13110        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
13111        let r = e.execute("ANALYZE").unwrap();
13112        match r {
13113            QueryResult::CommandOk {
13114                affected,
13115                modified_catalog,
13116            } => {
13117                assert_eq!(affected, 2);
13118                assert!(modified_catalog);
13119            }
13120            other => panic!("expected CommandOk, got {other:?}"),
13121        }
13122        assert!(e.statistics().get("t1", "id").is_some());
13123        assert!(e.statistics().get("t2", "name").is_some());
13124    }
13125
13126    #[test]
13127    fn select_from_spg_statistic_returns_rows_per_column() {
13128        let mut e = Engine::new();
13129        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
13130            .unwrap();
13131        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
13132        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
13133        e.execute("ANALYZE t").unwrap();
13134        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
13135        let QueryResult::Rows { rows, columns } = r else {
13136            panic!()
13137        };
13138        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
13139        assert_eq!(columns.len(), 6);
13140        assert_eq!(columns[0].name, "table_name");
13141        assert_eq!(columns[4].name, "histogram_bounds");
13142        assert_eq!(columns[5].name, "cold_row_count");
13143        assert_eq!(rows.len(), 2, "one row per column of t");
13144        // Sorted by (table_name, column_name).
13145        match (&rows[0].values[0], &rows[0].values[1]) {
13146            (Value::Text(t), Value::Text(c)) => {
13147                assert_eq!(t, "t");
13148                // BTreeMap orders (table, column); columns "id" < "label".
13149                assert_eq!(c, "id");
13150            }
13151            _ => panic!(),
13152        }
13153    }
13154
13155    #[test]
13156    fn analyze_skips_vector_columns() {
13157        // Vector columns have their own stats shape (HNSW graph);
13158        // ANALYZE leaves them out of spg_statistic.
13159        let mut e = Engine::new();
13160        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
13161            .unwrap();
13162        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
13163        e.execute("ANALYZE t").unwrap();
13164        assert!(e.statistics().get("t", "id").is_some());
13165        assert!(e.statistics().get("t", "v").is_none());
13166    }
13167
13168    #[test]
13169    fn statistics_persist_across_envelope_v5_round_trip() {
13170        let mut e = Engine::new();
13171        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13172        for i in 0..20 {
13173            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13174                .unwrap();
13175        }
13176        e.execute("ANALYZE").unwrap();
13177        let snap = e.snapshot();
13178        let e2 = Engine::restore_envelope(&snap).unwrap();
13179        let s = e2.statistics().get("t", "id").unwrap();
13180        assert_eq!(s.n_distinct, 20);
13181    }
13182
13183    // ── v6.2.1 auto-analyze threshold ───────────────────────────
13184
13185    #[test]
13186    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
13187        // For a table with 0 rows then 10 inserts → modified=10,
13188        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
13189        // after the 10th INSERT the threshold is met.
13190        let mut e = Engine::new();
13191        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13192        for i in 0..9 {
13193            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13194                .unwrap();
13195        }
13196        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
13197        e.execute("INSERT INTO t VALUES (9)").unwrap();
13198        let needs = e.tables_needing_analyze();
13199        assert_eq!(needs, alloc::vec!["t".to_string()]);
13200    }
13201
13202    #[test]
13203    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
13204        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
13205        // Each new INSERT bumps both modified and row_count, so to
13206        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
13207        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
13208        // more (200 total mods, row_count=1200, threshold=120 → fire).
13209        let mut e = Engine::new();
13210        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13211        for i in 0..1000 {
13212            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13213                .unwrap();
13214        }
13215        e.execute("ANALYZE t").unwrap();
13216        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
13217        for i in 1000..1050 {
13218            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13219                .unwrap();
13220        }
13221        assert!(
13222            e.tables_needing_analyze().is_empty(),
13223            "50 inserts < threshold of ~105"
13224        );
13225        for i in 1050..1200 {
13226            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13227                .unwrap();
13228        }
13229        assert_eq!(
13230            e.tables_needing_analyze(),
13231            alloc::vec!["t".to_string()],
13232            "200 inserts > 0.1 × 1200 threshold"
13233        );
13234    }
13235
13236    #[test]
13237    fn auto_analyze_threshold_resets_after_analyze() {
13238        let mut e = Engine::new();
13239        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13240        for i in 0..200 {
13241            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13242                .unwrap();
13243        }
13244        assert!(!e.tables_needing_analyze().is_empty());
13245        e.execute("ANALYZE").unwrap();
13246        assert!(
13247            e.tables_needing_analyze().is_empty(),
13248            "ANALYZE must reset the counter"
13249        );
13250    }
13251
13252    #[test]
13253    fn auto_analyze_threshold_tracks_updates_and_deletes() {
13254        let mut e = Engine::new();
13255        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
13256            .unwrap();
13257        for i in 0..50 {
13258            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
13259                .unwrap();
13260        }
13261        e.execute("ANALYZE t").unwrap();
13262        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
13263        // × max(50, 100) = 10. So 25 >= 10 → trigger.
13264        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
13265        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
13266        assert_eq!(e.tables_needing_analyze(), alloc::vec!["t".to_string()]);
13267    }
13268
13269    #[test]
13270    fn v4_envelope_loads_with_empty_statistics() {
13271        // Forge a v4 envelope by hand: catalog + users + pubs +
13272        // subs trailer, no statistics. A v6.2.0 reader must accept
13273        // it and surface an empty Statistics.
13274        let mut e = Engine::new();
13275        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
13276            .unwrap();
13277        let catalog = e.catalog.serialize();
13278        let users = crate::users::serialize_users(&e.users);
13279        let pubs = e.publications.serialize();
13280        let subs = e.subscriptions.serialize();
13281        let mut buf = Vec::new();
13282        buf.extend_from_slice(b"SPGENV01");
13283        buf.push(4u8);
13284        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13285        buf.extend_from_slice(&catalog);
13286        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13287        buf.extend_from_slice(&users);
13288        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
13289        buf.extend_from_slice(&pubs);
13290        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
13291        buf.extend_from_slice(&subs);
13292        let crc = spg_crypto::crc32::crc32(&buf);
13293        buf.extend_from_slice(&crc.to_le_bytes());
13294        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
13295        assert!(e2.statistics().is_empty());
13296    }
13297
13298    #[test]
13299    fn v1_v2_envelope_loads_with_empty_publications() {
13300        // A snapshot taken before v6.1.2 (no publication trailer,
13301        // envelope v2) must still deserialise — and the resulting
13302        // engine must report zero publications. Use the engine's own
13303        // round-trip with no publications: that emits v3 but with an
13304        // empty pubs block. Then forge a v2 envelope by hand to lock
13305        // the back-compat path.
13306        let mut e = Engine::new();
13307        // Force users to be non-empty so the snapshot takes the
13308        // envelope path rather than the bare-catalog fallback.
13309        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
13310            .unwrap();
13311
13312        // Forge an envelope v2: same shape as v3 but no pubs trailer.
13313        let catalog = e.catalog.serialize();
13314        let users = crate::users::serialize_users(&e.users);
13315        let mut buf = Vec::new();
13316        buf.extend_from_slice(b"SPGENV01");
13317        buf.push(2u8); // v2
13318        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13319        buf.extend_from_slice(&catalog);
13320        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13321        buf.extend_from_slice(&users);
13322        let crc = spg_crypto::crc32::crc32(&buf);
13323        buf.extend_from_slice(&crc.to_le_bytes());
13324
13325        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
13326        assert!(e2.publications().is_empty());
13327    }
13328}