Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod describe;
11pub mod eval;
12pub mod fts;
13pub mod json;
14pub mod memoize;
15pub mod plan_cache;
16pub mod publications;
17pub mod query_stats;
18pub mod reorder;
19pub mod selectivity;
20pub mod statistics;
21pub mod subscriptions;
22pub mod triggers;
23pub mod users;
24
25pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
26
27use alloc::borrow::Cow;
28use alloc::boxed::Box;
29use alloc::collections::BTreeMap;
30use alloc::string::{String, ToString};
31use alloc::vec::Vec;
32use core::fmt;
33
34use spg_sql::ast::{
35    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement, CreatePublicationStatement,
36    CreateSubscriptionStatement, CreateTableStatement, CreateUserStatement, Expr, FrameBound,
37    FrameKind, FromClause, IndexMethod, InsertStatement, JoinKind, Literal, OrderBy, SelectItem,
38    SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding as SqlVecEncoding,
39    WindowFrame,
40};
41use spg_sql::parser::{self, ParseError};
42use spg_storage::{
43    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
44    TableSchema, Value, VecEncoding,
45};
46
47use crate::eval::{EvalContext, EvalError};
48
49/// Result of executing one statement.
50#[derive(Debug, Clone, PartialEq)]
51#[non_exhaustive]
52pub enum QueryResult {
53    /// DDL or DML succeeded.
54    ///
55    /// `affected` is the row count for `INSERT` and 0 elsewhere.
56    /// `modified_catalog` tells the server whether this statement
57    /// caused the *committed* catalog to change — it's the signal to
58    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
59    /// statements executed inside a transaction (those only touch the
60    /// shadow), and true for `COMMIT` and for writes outside a TX.
61    CommandOk {
62        affected: usize,
63        modified_catalog: bool,
64    },
65    /// `SELECT` returned a (possibly empty) row set.
66    Rows {
67        columns: Vec<ColumnSchema>,
68        rows: Vec<Row>,
69    },
70}
71
72/// All errors the engine can return.
73///
74/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
75/// must include a `_` arm so new variants in subsequent v7.x releases
76/// are not breaking changes.
77#[derive(Debug, Clone, PartialEq)]
78#[non_exhaustive]
79pub enum EngineError {
80    Parse(ParseError),
81    Storage(StorageError),
82    Eval(EvalError),
83    /// Front-end accepted a construct that the v0.x executor doesn't support.
84    Unsupported(String),
85    /// `BEGIN` while another transaction is already open.
86    TransactionAlreadyOpen,
87    /// `COMMIT` / `ROLLBACK` with no active transaction.
88    NoActiveTransaction,
89    /// v4.0 sentinel: `execute_readonly` got a statement that
90    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
91    /// The caller should retake the write lock and dispatch through
92    /// `execute(&mut self)` instead.
93    WriteRequired,
94    /// v4.2: a SELECT would have returned more rows than the
95    /// configured `max_query_rows` cap. Carries the cap.
96    RowLimitExceeded(usize),
97    /// v4.5: cooperative cancellation — the host (server's
98    /// per-query watchdog) set the cancel flag while a long-running
99    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
100    /// is discarded; the caller should surface this as a timeout
101    /// to the client.
102    Cancelled,
103}
104
105impl fmt::Display for EngineError {
106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107        match self {
108            Self::Parse(e) => write!(f, "parse: {e}"),
109            Self::Storage(e) => write!(f, "storage: {e}"),
110            Self::Eval(e) => write!(f, "eval: {e}"),
111            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
112            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
113            Self::NoActiveTransaction => f.write_str("no active transaction"),
114            Self::WriteRequired => {
115                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
116            }
117            Self::RowLimitExceeded(n) => {
118                write!(f, "query exceeded max_query_rows={n}")
119            }
120            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
121        }
122    }
123}
124
125impl From<ParseError> for EngineError {
126    fn from(e: ParseError) -> Self {
127        Self::Parse(e)
128    }
129}
130impl From<StorageError> for EngineError {
131    fn from(e: StorageError) -> Self {
132        Self::Storage(e)
133    }
134}
135impl From<EvalError> for EngineError {
136    fn from(e: EvalError) -> Self {
137        Self::Eval(e)
138    }
139}
140
141/// The execution engine. Holds the catalog and (later) other server-scope
142/// state. `Engine::new()` is intentionally cheap so callers can construct one
143/// per database, per test.
144/// Function pointer that returns "now" as microseconds since Unix
145/// epoch. The engine is `no_std`, so it can't reach for `std::time`
146/// itself — callers (`spg-server`, the sqllogictest runner) inject a
147/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
148/// `Unsupported`.
149pub type ClockFn = fn() -> i64;
150
151/// Function pointer that produces 16 cryptographically random bytes.
152/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
153/// itself — host (`spg-server`) injects an OS-backed source. `None`
154/// means SQL-driven `CREATE USER` falls back to a deterministic salt
155/// derived from the username (acceptable in tests; the server always
156/// installs a real RNG so production paths never see this).
157pub type SaltFn = fn() -> [u8; 16];
158
159/// v4.5 cooperative cancellation token. A long-running SELECT /
160/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
161/// and bails with `EngineError::Cancelled`. The host
162/// (`spg-server`) creates an `AtomicBool` per query, spawns a
163/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
164/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
165///
166/// `CancelToken::none()` is a no-op — used by the legacy `execute`
167/// and `execute_readonly` entry points so existing callers don't
168/// change.
169#[derive(Debug, Clone, Copy)]
170pub struct CancelToken<'a> {
171    flag: Option<&'a core::sync::atomic::AtomicBool>,
172}
173
174impl<'a> CancelToken<'a> {
175    #[must_use]
176    pub const fn none() -> Self {
177        Self { flag: None }
178    }
179
180    #[must_use]
181    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
182        Self { flag: Some(f) }
183    }
184
185    #[must_use]
186    pub fn is_cancelled(self) -> bool {
187        self.flag
188            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
189    }
190
191    /// Returns `Err(Cancelled)` if the token has been tripped.
192    /// Used at row-loop checkpoints to bail cooperatively without
193    /// scattering raw `is_cancelled` checks across the executor.
194    #[inline]
195    pub fn check(self) -> Result<(), EngineError> {
196        if self.is_cancelled() {
197            Err(EngineError::Cancelled)
198        } else {
199            Ok(())
200        }
201    }
202}
203
204// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
205// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
206//
207// Wraps a catalog blob + a user blob behind a small header so the
208// server can persist both atomically without inventing a new file.
209// Bare catalog blobs (v3.x) still load via `restore_envelope` since
210// the magic check fails fast and the function falls back to
211// `Catalog::deserialize`.
212//
213// Layout — v1 (v4.1, no CRC):
214//   [8 bytes magic "SPGENV01"]
215//   [u8 version = 1]
216//   [u32 catalog_len][catalog bytes]
217//   [u32 users_len][users bytes]
218//
219// Layout — v2 (v4.37, CRC32 of body):
220//   [8 bytes magic "SPGENV01"]
221//   [u8 version = 2]
222//   [u32 catalog_len][catalog bytes]
223//   [u32 users_len][users bytes]
224//   [u32 crc32]                      ← CRC32 of every byte before it.
225//
226// Layout — v3 (v6.1.2, publications trailer):
227//   [8 bytes magic "SPGENV01"]
228//   [u8 version = 3]
229//   [u32 catalog_len][catalog bytes]
230//   [u32 users_len][users bytes]
231//   [u32 pubs_len][publications bytes]
232//   [u32 crc32]
233//
234// Layout — v4 (v6.1.4, subscriptions trailer):
235//   [8 bytes magic "SPGENV01"]
236//   [u8 version = 4]
237//   [u32 catalog_len][catalog bytes]
238//   [u32 users_len][users bytes]
239//   [u32 pubs_len][publications bytes]
240//   [u32 subs_len][subscriptions bytes]
241//   [u32 crc32]
242//
243// Layout — v5 (v6.2.0, statistics trailer):
244//   [8 bytes magic "SPGENV01"]
245//   [u8 version = 5]
246//   [u32 catalog_len][catalog bytes]
247//   [u32 users_len][users bytes]
248//   [u32 pubs_len][publications bytes]
249//   [u32 subs_len][subscriptions bytes]
250//   [u32 stats_len][statistics bytes]      ← NEW
251//   [u32 crc32]
252//
253// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
254// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
255// statistics; v3 loads with empty subscriptions + statistics; v4
256// loads with empty statistics; v5 deserialises all three. Older
257// SPG versions reading a v5 envelope fall through the version
258// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
259// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
260
261const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
262const ENVELOPE_VERSION_V1: u8 = 1;
263const ENVELOPE_VERSION_V2: u8 = 2;
264const ENVELOPE_VERSION_V3: u8 = 3;
265const ENVELOPE_VERSION_V4: u8 = 4;
266const ENVELOPE_VERSION_V5: u8 = 5;
267
268fn build_envelope(catalog: &[u8], users: &[u8], pubs: &[u8], subs: &[u8], stats: &[u8]) -> Vec<u8> {
269    let mut out = Vec::with_capacity(
270        8 + 1
271            + 4
272            + catalog.len()
273            + 4
274            + users.len()
275            + 4
276            + pubs.len()
277            + 4
278            + subs.len()
279            + 4
280            + stats.len()
281            + 4,
282    );
283    out.extend_from_slice(ENVELOPE_MAGIC);
284    out.push(ENVELOPE_VERSION_V5);
285    out.extend_from_slice(
286        &u32::try_from(catalog.len())
287            .expect("≤ 4G catalog")
288            .to_le_bytes(),
289    );
290    out.extend_from_slice(catalog);
291    out.extend_from_slice(
292        &u32::try_from(users.len())
293            .expect("≤ 4G users")
294            .to_le_bytes(),
295    );
296    out.extend_from_slice(users);
297    out.extend_from_slice(
298        &u32::try_from(pubs.len())
299            .expect("≤ 4G publications")
300            .to_le_bytes(),
301    );
302    out.extend_from_slice(pubs);
303    out.extend_from_slice(
304        &u32::try_from(subs.len())
305            .expect("≤ 4G subscriptions")
306            .to_le_bytes(),
307    );
308    out.extend_from_slice(subs);
309    out.extend_from_slice(
310        &u32::try_from(stats.len())
311            .expect("≤ 4G statistics")
312            .to_le_bytes(),
313    );
314    out.extend_from_slice(stats);
315    let crc = spg_crypto::crc32::crc32(&out);
316    out.extend_from_slice(&crc.to_le_bytes());
317    out
318}
319
320/// Outcome of envelope parsing: either bare-catalog fallback, a
321/// successfully split section trio from a v1/v2/v3 envelope, or an
322/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
323/// (catalog-only fallback) preserves v3.x readability. v1/v2
324/// envelopes set `publications` to `None`; v3 sets it to the
325/// publications byte slice.
326enum EnvelopeParse<'a> {
327    Bare,
328    Pair {
329        catalog: &'a [u8],
330        users: &'a [u8],
331        publications: Option<&'a [u8]>,
332        subscriptions: Option<&'a [u8]>,
333        statistics: Option<&'a [u8]>,
334    },
335    CrcMismatch {
336        expected: u32,
337        computed: u32,
338    },
339}
340
341/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
342/// `Bare` for a buffer that doesn't look like an envelope (v3.x
343/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
344/// whose trailing CRC32 doesn't match the body.
345fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
346    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
347        return EnvelopeParse::Bare;
348    }
349    let version = buf[8];
350    if !matches!(
351        version,
352        ENVELOPE_VERSION_V1
353            | ENVELOPE_VERSION_V2
354            | ENVELOPE_VERSION_V3
355            | ENVELOPE_VERSION_V4
356            | ENVELOPE_VERSION_V5
357    ) {
358        return EnvelopeParse::Bare;
359    }
360    let mut p = 9usize;
361    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
362        return EnvelopeParse::Bare;
363    };
364    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
365        return EnvelopeParse::Bare;
366    };
367    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
368    p += 4;
369    if p + cat_len + 4 > buf.len() {
370        return EnvelopeParse::Bare;
371    }
372    let catalog = &buf[p..p + cat_len];
373    p += cat_len;
374    let Some(user_len_bytes) = buf.get(p..p + 4) else {
375        return EnvelopeParse::Bare;
376    };
377    let Ok(user_len_arr) = user_len_bytes.try_into() else {
378        return EnvelopeParse::Bare;
379    };
380    let user_len = u32::from_le_bytes(user_len_arr) as usize;
381    p += 4;
382    if p + user_len > buf.len() {
383        return EnvelopeParse::Bare;
384    }
385    let users = &buf[p..p + user_len];
386    p += user_len;
387    let publications = if matches!(
388        version,
389        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
390    ) {
391        // [u32 pubs_len][publications bytes]
392        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
393            return EnvelopeParse::Bare;
394        };
395        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
396            return EnvelopeParse::Bare;
397        };
398        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
399        p += 4;
400        if p + pubs_len > buf.len() {
401            return EnvelopeParse::Bare;
402        }
403        let pubs_slice = &buf[p..p + pubs_len];
404        p += pubs_len;
405        Some(pubs_slice)
406    } else {
407        None
408    };
409    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
410        // [u32 subs_len][subscriptions bytes]
411        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
412            return EnvelopeParse::Bare;
413        };
414        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
415            return EnvelopeParse::Bare;
416        };
417        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
418        p += 4;
419        if p + subs_len > buf.len() {
420            return EnvelopeParse::Bare;
421        }
422        let subs_slice = &buf[p..p + subs_len];
423        p += subs_len;
424        Some(subs_slice)
425    } else {
426        None
427    };
428    let statistics = if version == ENVELOPE_VERSION_V5 {
429        // [u32 stats_len][statistics bytes]
430        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
431            return EnvelopeParse::Bare;
432        };
433        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
434            return EnvelopeParse::Bare;
435        };
436        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
437        p += 4;
438        if p + stats_len > buf.len() {
439            return EnvelopeParse::Bare;
440        }
441        let stats_slice = &buf[p..p + stats_len];
442        p += stats_len;
443        Some(stats_slice)
444    } else {
445        None
446    };
447    if matches!(
448        version,
449        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
450    ) {
451        if p + 4 != buf.len() {
452            return EnvelopeParse::Bare;
453        }
454        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
455            return EnvelopeParse::Bare;
456        };
457        let expected = u32::from_le_bytes(crc_arr);
458        let computed = spg_crypto::crc32::crc32(&buf[..p]);
459        if expected != computed {
460            return EnvelopeParse::CrcMismatch { expected, computed };
461        }
462    } else if p != buf.len() {
463        // v1: must end exactly at the users section.
464        return EnvelopeParse::Bare;
465    }
466    EnvelopeParse::Pair {
467        catalog,
468        users,
469        publications,
470        subscriptions,
471        statistics,
472    }
473}
474
475/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
476/// threaded through `Engine::execute_in` so dispatch can identify which
477/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
478/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
479/// startup replay — implicitly uses through the unchanged
480/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
481/// runtime (dispatch holds `engine.write()` across the wrap, same as
482/// v4.34); the map shape is here to let v4.42 turn on N in-flight
483/// implicit TXs without reshuffling the engine internals.
484#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
485pub struct TxId(pub u64);
486
487/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
488/// global-shadow path. New `alloc_tx_id` handles start at 1.
489pub const IMPLICIT_TX: TxId = TxId(0);
490
491/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
492/// SEGMENTS` when no explicit target is supplied. Segments whose
493/// `OwnedSegment::bytes().len()` is **strictly** less than this
494/// value are eligible to merge. spg-server reads
495/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
496pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
497
498/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
499/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
500/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
501/// rolls back (slot removed, catalog discarded).
502#[derive(Debug, Default, Clone)]
503struct TxState {
504    /// The TX's shadow copy of the catalog. Started as a clone of
505    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
506    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
507    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
508    catalog: Catalog,
509    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
510    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
511    /// `ROLLBACK TO <name>` restores from the entry and pops everything
512    /// after it; `RELEASE <name>` discards the entry and everything
513    /// after; COMMIT/ROLLBACK clears the whole stack.
514    savepoints: Vec<(String, Catalog)>,
515}
516
517/// v7.11.0 — frozen read-only view of the engine's committed state.
518/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
519/// catalog, statistics, clock function, and row-cap config — the
520/// four fields the `execute_readonly` path actually reads. Cheap to
521/// `Clone` (each clone shares the underlying `PersistentVec` row
522/// storage; only the trie root pointers copy). Send + Sync so a
523/// snapshot can be moved across `tokio::task::spawn_blocking`
524/// boundaries without coordination.
525///
526/// The contract: a snapshot reflects the engine's state at the
527/// moment `clone_snapshot()` returned. Subsequent writes to the
528/// engine are NOT visible. Callers who need fresher data take a
529/// new snapshot.
530#[derive(Debug, Clone)]
531pub struct CatalogSnapshot {
532    catalog: Catalog,
533    statistics: statistics::Statistics,
534    clock: Option<ClockFn>,
535    max_query_rows: Option<usize>,
536}
537
538#[derive(Debug, Default)]
539pub struct Engine {
540    /// Committed catalog — what survives `Engine::snapshot()` and what
541    /// outside-TX `SELECT`s read.
542    catalog: Catalog,
543    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
544    /// v4.41.1 runtime invariant: at most one entry (single-writer
545    /// model unchanged). v4.42 will let dispatch hold multiple entries
546    /// concurrently for group commit + engine MVCC.
547    tx_catalogs: BTreeMap<TxId, TxState>,
548    /// Which slot the next exec_* call should mutate. Set by
549    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
550    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
551    /// write goes straight against `catalog`).
552    current_tx: Option<TxId>,
553    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
554    /// reserved for `IMPLICIT_TX`.
555    next_tx_id: u64,
556    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
557    /// / `CURRENT_DATE`. Set by the host environment.
558    clock: Option<ClockFn>,
559    /// v4.1 cryptographic RNG for per-user password salt. Set by the
560    /// host. `None` means SQL-driven `CREATE USER` uses a
561    /// deterministic fallback — see `SaltFn`.
562    salt_fn: Option<SaltFn>,
563    /// v4.2 per-query row cap. `None` = unlimited. When set, a
564    /// SELECT that materialises more than `n` rows returns
565    /// `EngineError::RowLimitExceeded`. Enforced before the result
566    /// is shaped into wire frames so a runaway scan can't blow the
567    /// server's heap.
568    max_query_rows: Option<usize>,
569    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
570    /// the server decides what that means at the auth boundary
571    /// (open mode vs legacy single-password mode). User CRUD goes
572    /// through `create_user`/`drop_user`/`verify_user`; persistence
573    /// rides the snapshot envelope alongside the catalog.
574    users: UserStore,
575    /// v6.1.2 logical-replication publication catalog. Empty until
576    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
577    /// trailer (see `build_envelope`).
578    publications: publications::Publications,
579    /// v6.1.4 logical-replication subscription catalog. Empty until
580    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
581    /// trailer.
582    subscriptions: subscriptions::Subscriptions,
583    /// v6.2.0 — per-column statistics for the cost-based optimizer.
584    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
585    /// table. Persistence rides the v5 envelope trailer.
586    statistics: statistics::Statistics,
587    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
588    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
589    /// the snapshot envelope (rebuilt on demand after restart).
590    plan_cache: plan_cache::PlanCache,
591    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
592    /// surfaced via `spg_stat_query` virtual table. Updated by the
593    /// `execute_*` paths after a successful execute.
594    query_stats: query_stats::QueryStats,
595    /// v6.5.2 — connection-state provider callback. spg-server
596    /// registers a function at startup that snapshots its
597    /// per-pgwire-connection registry into `ActivityRow`s; engine
598    /// reads through it on every `SELECT * FROM spg_stat_activity`.
599    /// `None` ⇒ no-data (returns empty rows; matches the no_std
600    /// embedded callers that don't run pgwire).
601    activity_provider: Option<ActivityProvider>,
602    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
603    /// activity_provider: spg-server registers both at startup;
604    /// engine reads through on `SELECT * FROM spg_audit_chain` and
605    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
606    audit_chain_provider: Option<AuditChainProvider>,
607    audit_verifier: Option<AuditVerifier>,
608    /// v6.5.6 — slow-query log threshold in microseconds. When set,
609    /// every successful execute whose elapsed exceeds the threshold
610    /// gets fed to the registered slow-query log callback (so
611    /// spg-server can emit a structured log line). Default `None`
612    /// = no slow-query logging.
613    slow_query_threshold_us: Option<u64>,
614    slow_query_logger: Option<SlowQueryLogger>,
615    /// v7.12.1 — session parameters set via `SET <name> = <value>`.
616    /// Only `default_text_search_config` is consumed by the engine
617    /// today (the FTS function dispatcher reads it when
618    /// `to_tsvector(text)` is called without an explicit config).
619    /// All other names are accepted + recorded so PG-dump output
620    /// loads, but have no behavioural effect.
621    session_params: BTreeMap<String, String>,
622    /// v7.12.7 — depth counter for trigger-emitted embedded SQL.
623    /// Each time the engine executes a `DeferredEmbeddedStmt` it
624    /// increments this; the recursive `execute_stmt_with_cancel`
625    /// inside that path checks against [`MAX_TRIGGER_RECURSION`]
626    /// to bound runaway cascades (trigger A's UPDATE on table B
627    /// fires trigger B which UPDATEs table A which fires trigger
628    /// A again…). Reset to 0 once the original DML returns.
629    trigger_recursion_depth: u32,
630}
631
632/// v7.12.7 — hard cap on nested trigger-emitted embedded SQL
633/// fires. 16 deep is well past anything a normal trigger graph
634/// uses while still preventing infinite-loop wedging.
635const MAX_TRIGGER_RECURSION: u32 = 16;
636
637/// v6.5.6 — callback signature for slow-query log emission. Called
638/// with `(sql, elapsed_us)` once per successful execute that crosses
639/// the threshold.
640pub type SlowQueryLogger = fn(&str, u64);
641
642/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
643/// state. Round-trips through `Engine::execute` to recreate the
644/// same schema (sans data + indexes — indexes are emitted as a
645/// separate `CREATE INDEX` chain in `spg_database_ddl`).
646fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
647    let mut out = alloc::format!("CREATE TABLE {name} (");
648    for (i, col) in columns.iter().enumerate() {
649        if i > 0 {
650            out.push_str(", ");
651        }
652        out.push_str(&col.name);
653        out.push(' ');
654        out.push_str(&render_data_type(col.ty));
655        if !col.nullable {
656            out.push_str(" NOT NULL");
657        }
658        if col.auto_increment {
659            out.push_str(" AUTO_INCREMENT");
660        }
661    }
662    out.push(')');
663    out
664}
665
666fn render_data_type(ty: DataType) -> String {
667    match ty {
668        DataType::SmallInt => "SMALLINT".into(),
669        DataType::Int => "INT".into(),
670        DataType::BigInt => "BIGINT".into(),
671        DataType::Float => "FLOAT".into(),
672        DataType::Text => "TEXT".into(),
673        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
674        DataType::Char(n) => alloc::format!("CHAR({n})"),
675        DataType::Bool => "BOOL".into(),
676        DataType::Vector { dim, encoding } => match encoding {
677            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
678            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
679            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
680        },
681        DataType::Numeric { precision, scale } => {
682            alloc::format!("NUMERIC({precision},{scale})")
683        }
684        DataType::Date => "DATE".into(),
685        DataType::Timestamp => "TIMESTAMP".into(),
686        DataType::Interval => "INTERVAL".into(),
687        DataType::Json => "JSON".into(),
688        DataType::Jsonb => "JSONB".into(),
689        DataType::Timestamptz => "TIMESTAMPTZ".into(),
690        DataType::Bytes => "BYTEA".into(),
691        DataType::TextArray => "TEXT[]".into(),
692        DataType::IntArray => "INT[]".into(),
693        DataType::BigIntArray => "BIGINT[]".into(),
694        DataType::TsVector => "TSVECTOR".into(),
695        DataType::TsQuery => "TSQUERY".into(),
696    }
697}
698
699/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
700/// spg-server can construct rows without re-exporting internal
701/// dispatch types.
702#[derive(Debug, Clone)]
703pub struct ActivityRow {
704    pub pid: u32,
705    pub user: String,
706    pub started_at_us: i64,
707    pub current_sql: String,
708    pub wait_event: String,
709    pub elapsed_us: i64,
710    pub in_transaction: bool,
711}
712
713/// v6.5.2 — provider callback type. Fresh snapshot returned each
714/// call; engine doesn't cache the slice.
715pub type ActivityProvider = fn() -> Vec<ActivityRow>;
716
717/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
718/// spg-server can construct rows directly from `AuditEntry`.
719#[derive(Debug, Clone)]
720pub struct AuditRow {
721    pub seq: i64,
722    pub ts_ms: i64,
723    pub prev_hash_hex: String,
724    pub entry_hash_hex: String,
725    pub sql: String,
726}
727
728/// v6.5.3 — chain-table provider + verifier. spg-server registers
729/// fn pointers that snapshot / verify the audit log. `verify`
730/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
731/// `-1` on a clean chain.
732pub type AuditChainProvider = fn() -> Vec<AuditRow>;
733pub type AuditVerifier = fn() -> (i64, i64);
734
735impl Engine {
736    pub fn new() -> Self {
737        Self {
738            catalog: Catalog::new(),
739            tx_catalogs: BTreeMap::new(),
740            current_tx: None,
741            next_tx_id: 1,
742            clock: None,
743            salt_fn: None,
744            max_query_rows: None,
745            users: UserStore::new(),
746            publications: publications::Publications::new(),
747            subscriptions: subscriptions::Subscriptions::new(),
748            statistics: statistics::Statistics::new(),
749            plan_cache: plan_cache::PlanCache::new(),
750            query_stats: query_stats::QueryStats::new(),
751            activity_provider: None,
752            audit_chain_provider: None,
753            audit_verifier: None,
754            slow_query_threshold_us: None,
755            slow_query_logger: None,
756            session_params: BTreeMap::new(),
757            trigger_recursion_depth: 0,
758        }
759    }
760
761    /// v7.11.0 — clone the engine's committed catalog + read-time
762    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
763    /// backed by `PersistentVec`; cloning is O(log n) per table).
764    /// Subsequent writes to this engine are invisible to the
765    /// snapshot; the snapshot is self-contained and can be moved
766    /// to another thread for concurrent `execute_readonly_on_snapshot`
767    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
768    /// and any other read-fanout pattern.
769    #[must_use]
770    pub fn clone_snapshot(&self) -> CatalogSnapshot {
771        CatalogSnapshot {
772            catalog: self.active_catalog().clone(),
773            statistics: self.statistics.clone(),
774            clock: self.clock,
775            max_query_rows: self.max_query_rows,
776        }
777    }
778
779    /// v7.11.1 — execute a read-only SQL statement against a
780    /// `CatalogSnapshot` without touching this engine. Same
781    /// semantics as `execute_readonly` but parameterised on the
782    /// snapshot's catalog. Reject DDL/DML the same way
783    /// `execute_readonly` does. Static-on-Self so the caller can
784    /// dispatch without holding an `Engine` borrow alongside the
785    /// snapshot.
786    pub fn execute_readonly_on_snapshot(
787        snapshot: &CatalogSnapshot,
788        sql: &str,
789    ) -> Result<QueryResult, EngineError> {
790        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
791    }
792
793    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
794    /// cancellation. Builds a transient `Engine` over the snapshot
795    /// state, runs `execute_readonly_with_cancel`, drops. The
796    /// transient engine is cheap to construct (no I/O; everything
797    /// is just struct moves) and lets the existing read path stay
798    /// untouched.
799    pub fn execute_readonly_on_snapshot_with_cancel(
800        snapshot: &CatalogSnapshot,
801        sql: &str,
802        cancel: CancelToken<'_>,
803    ) -> Result<QueryResult, EngineError> {
804        let transient = Engine {
805            catalog: snapshot.catalog.clone(),
806            statistics: snapshot.statistics.clone(),
807            clock: snapshot.clock,
808            max_query_rows: snapshot.max_query_rows,
809            ..Engine::default()
810        };
811        transient.execute_readonly_with_cancel(sql, cancel)
812    }
813
814    /// Construct an engine restored from a previously-snapshotted catalog
815    /// (see `snapshot()`).
816    pub fn restore(catalog: Catalog) -> Self {
817        Self {
818            catalog,
819            tx_catalogs: BTreeMap::new(),
820            current_tx: None,
821            next_tx_id: 1,
822            clock: None,
823            salt_fn: None,
824            max_query_rows: None,
825            users: UserStore::new(),
826            publications: publications::Publications::new(),
827            subscriptions: subscriptions::Subscriptions::new(),
828            statistics: statistics::Statistics::new(),
829            plan_cache: plan_cache::PlanCache::new(),
830            query_stats: query_stats::QueryStats::new(),
831            activity_provider: None,
832            audit_chain_provider: None,
833            audit_verifier: None,
834            slow_query_threshold_us: None,
835            slow_query_logger: None,
836            session_params: BTreeMap::new(),
837            trigger_recursion_depth: 0,
838        }
839    }
840
841    /// Restore an engine + user table from a v4.1 envelope produced
842    /// by `snapshot_with_users()`. Falls back to plain catalog-only
843    /// restore if the envelope magic isn't present (so v3.x snapshot
844    /// files still load). v6.1.2 adds the optional publications
845    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
846    /// empty publication table.
847    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
848        match split_envelope(buf) {
849            EnvelopeParse::Pair {
850                catalog: catalog_bytes,
851                users: user_bytes,
852                publications: pub_bytes,
853                subscriptions: sub_bytes,
854                statistics: stats_bytes,
855            } => {
856                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
857                let users = users::deserialize_users(user_bytes)
858                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
859                let publications = match pub_bytes {
860                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
861                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
862                    })?,
863                    None => publications::Publications::new(),
864                };
865                let subscriptions = match sub_bytes {
866                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
867                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
868                    })?,
869                    None => subscriptions::Subscriptions::new(),
870                };
871                let statistics = match stats_bytes {
872                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
873                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
874                    })?,
875                    None => statistics::Statistics::new(),
876                };
877                Ok(Self {
878                    catalog,
879                    tx_catalogs: BTreeMap::new(),
880                    current_tx: None,
881                    next_tx_id: 1,
882                    clock: None,
883                    salt_fn: None,
884                    max_query_rows: None,
885                    users,
886                    publications,
887                    subscriptions,
888                    statistics,
889                    plan_cache: plan_cache::PlanCache::new(),
890                    query_stats: query_stats::QueryStats::new(),
891                    activity_provider: None,
892                    audit_chain_provider: None,
893                    audit_verifier: None,
894                    slow_query_threshold_us: None,
895                    slow_query_logger: None,
896                    session_params: BTreeMap::new(),
897                    trigger_recursion_depth: 0,
898                })
899            }
900            EnvelopeParse::CrcMismatch { expected, computed } => {
901                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
902                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
903                ))))
904            }
905            EnvelopeParse::Bare => {
906                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
907                Ok(Self::restore(catalog))
908            }
909        }
910    }
911
912    pub const fn users(&self) -> &UserStore {
913        &self.users
914    }
915
916    /// `salt` is supplied by the caller (the host has a random
917    /// source; the engine is `no_std`). Caller should pass a fresh
918    /// 16-byte random value per user.
919    pub fn create_user(
920        &mut self,
921        name: &str,
922        password: &str,
923        role: Role,
924        salt: [u8; 16],
925    ) -> Result<(), UserError> {
926        self.users.create(name, password, role, salt)?;
927        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
928        // auth can verify without re-running PBKDF2 per attempt.
929        // Uses a fresh salt from the host RNG (falls back to a
930        // deterministic per-username salt when no RNG is wired, same
931        // as the legacy hash path).
932        let scram_salt = self.salt_fn.map_or_else(
933            || {
934                let mut s = [0u8; users::SCRAM_SALT_LEN];
935                let digest = spg_crypto::hash(name.as_bytes());
936                // Use bytes 16..32 of BLAKE3 so we don't reuse the
937                // exact same fallback salt as the BLAKE3 hash path.
938                s.copy_from_slice(&digest[16..32]);
939                s
940            },
941            |f| f(),
942        );
943        self.users
944            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
945        Ok(())
946    }
947
948    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
949        self.users.drop(name)
950    }
951
952    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
953        self.users.verify(name, password)
954    }
955
956    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
957    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
958    #[must_use]
959    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
960        self.clock = Some(clock);
961        self
962    }
963
964    /// Builder: attach an OS-backed RNG for per-user password salts.
965    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
966    #[must_use]
967    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
968        self.salt_fn = Some(f);
969        self
970    }
971
972    /// Builder: cap the number of rows a single SELECT may return.
973    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
974    /// the bound is checked inside the executor so a runaway
975    /// catalog scan can't allocate millions of rows before the
976    /// server gets a chance to reject the result.
977    #[must_use]
978    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
979        self.max_query_rows = Some(n);
980        self
981    }
982
983    /// The *committed* catalog. Note: during a transaction this returns the
984    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
985    /// the shadow. Tests that inspect outside-TX state should use this.
986    pub const fn catalog(&self) -> &Catalog {
987        &self.catalog
988    }
989
990    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
991    /// adds the rule that an open TX's shadow is never snapshotted — only the
992    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
993    /// when there are users to persist; an empty user table snapshots as the
994    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
995    /// adds publications to the envelope condition: either non-empty
996    /// users OR non-empty publications now triggers the envelope path.
997    pub fn snapshot(&self) -> Vec<u8> {
998        if self.users.is_empty()
999            && self.publications.is_empty()
1000            && self.subscriptions.is_empty()
1001            && self.statistics.is_empty()
1002        {
1003            self.catalog.serialize()
1004        } else {
1005            build_envelope(
1006                &self.catalog.serialize(),
1007                &users::serialize_users(&self.users),
1008                &self.publications.serialize(),
1009                &self.subscriptions.serialize(),
1010                &self.statistics.serialize(),
1011            )
1012        }
1013    }
1014
1015    /// True when at least one TX slot is in flight. v4.41.1 runtime
1016    /// invariant: at most one slot active at a time (dispatch holds
1017    /// `engine.write()` across the entire wrap). v4.42 will let this
1018    /// return true with multiple slots concurrently.
1019    pub fn in_transaction(&self) -> bool {
1020        !self.tx_catalogs.is_empty()
1021    }
1022
1023    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
1024    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
1025    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
1026    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
1027    /// sequentially under a single `engine.write()` so each task's
1028    /// mutations accumulate into shared state, then either keeps the
1029    /// accumulated state (fsync OK) or restores the pre-image via
1030    /// `replace_catalog` (fsync err).
1031    pub fn alloc_tx_id(&mut self) -> TxId {
1032        let id = TxId(self.next_tx_id);
1033        self.next_tx_id = self.next_tx_id.saturating_add(1);
1034        id
1035    }
1036
1037    /// v4.42 — atomically replace the live catalog. Used by the
1038    /// commit-barrier leader to roll back a group whose batched
1039    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1040    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1041    /// at group start, sequentially applies each task's BEGIN+sql+
1042    /// COMMIT under the same write lock to accumulate mutations
1043    /// into shared state, batches the WAL bytes, fsyncs once, and
1044    /// on failure calls this with the pre-image to undo every
1045    /// task in the group at once.
1046    ///
1047    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1048    /// explicit-TX slot from a concurrent client (created via the
1049    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1050    /// MVCC-readers v5+ work) has its own snapshot baked into the
1051    /// slot — restoring `self.catalog` to the pre-image leaves
1052    /// those slots untouched, exactly as they were when the leader
1053    /// took the lock. The leader's own implicit-TX slots are all
1054    /// already discarded (`exec_commit` removed them as each
1055    /// task's COMMIT ran) by the time this is reached.
1056    pub fn replace_catalog(&mut self, catalog: Catalog) {
1057        self.catalog = catalog;
1058    }
1059
1060    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1061    /// so tests + the spg-server freezer can drive a freeze without
1062    /// reaching into the private `active_catalog_mut`. v6.7.4
1063    /// parallel freezer will build on this surface.
1064    ///
1065    /// Marks the table's cached `cold_row_count` stale because the
1066    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1067    pub fn freeze_oldest_to_cold(
1068        &mut self,
1069        table_name: &str,
1070        index_name: &str,
1071        max_rows: usize,
1072    ) -> Result<spg_storage::FreezeReport, EngineError> {
1073        let report = self
1074            .active_catalog_mut()
1075            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1076            .map_err(EngineError::Storage)?;
1077        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1078            t.mark_cold_row_count_stale();
1079        }
1080        Ok(report)
1081    }
1082
1083    /// v6.7.5 — public shim used by the spg-server follower's
1084    /// segment-forwarding receiver. Registers a cold-tier segment
1085    /// at a specific id (the master's id, as transmitted on the
1086    /// wire) so the follower's BTree-Cold locators stay byte-
1087    /// identical with the master's. Wraps
1088    /// `Catalog::load_segment_bytes_at` under the standard
1089    /// clone-mutate-replace pattern.
1090    ///
1091    /// Returns `Ok(())` on success **and** on the "slot already
1092    /// occupied" case — a follower mid-reconnect may receive a
1093    /// segment chunk for a segment_id it already has on disk
1094    /// (forwarded last session); the caller should treat that
1095    /// path as a no-op rather than a fatal error.
1096    pub fn receive_cold_segment(
1097        &mut self,
1098        segment_id: u32,
1099        bytes: Vec<u8>,
1100    ) -> Result<(), EngineError> {
1101        let mut new_cat = self.catalog.clone();
1102        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1103            Ok(()) => {
1104                self.replace_catalog(new_cat);
1105                Ok(())
1106            }
1107            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1108            Err(e) => Err(EngineError::Storage(e)),
1109        }
1110    }
1111
1112    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1113    /// driving every BTree index on every user table. Returns one
1114    /// `(table, index, report)` triple for each merge that
1115    /// actually happened (no-op (table, index) pairs are filtered
1116    /// out so callers can size persist-side work to the live
1117    /// merges). Caller is responsible for persisting each
1118    /// `report.merged_segment_bytes` and updating the on-disk
1119    /// segment registry; engine layer is no_std and never
1120    /// touches disk.
1121    ///
1122    /// Marks every touched table's cached `cold_row_count` stale
1123    /// — compaction GC'd some shadowed rows, so the count must be
1124    /// re-derived on the next ANALYZE.
1125    pub fn compact_cold_segments_with_target(
1126        &mut self,
1127        target_segment_bytes: u64,
1128    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1129        let table_names = self.active_catalog().table_names();
1130        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1131        for tname in table_names {
1132            if is_internal_table_name(&tname) {
1133                continue;
1134            }
1135            let idx_names: Vec<String> = {
1136                let Some(t) = self.active_catalog().get(&tname) else {
1137                    continue;
1138                };
1139                t.indices()
1140                    .iter()
1141                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1142                    .map(|i| i.name.clone())
1143                    .collect()
1144            };
1145            for iname in idx_names {
1146                let report = self
1147                    .active_catalog_mut()
1148                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1149                    .map_err(EngineError::Storage)?;
1150                if report.merged_segment_id.is_some() {
1151                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1152                        t.mark_cold_row_count_stale();
1153                    }
1154                    reports.push((tname.clone(), iname, report));
1155                }
1156            }
1157        }
1158        Ok(reports)
1159    }
1160
1161    fn active_catalog(&self) -> &Catalog {
1162        match self.current_tx {
1163            Some(t) => self
1164                .tx_catalogs
1165                .get(&t)
1166                .map_or(&self.catalog, |s| &s.catalog),
1167            None => &self.catalog,
1168        }
1169    }
1170
1171    /// v7.12.4 — snapshot every row-level trigger on `table` that
1172    /// fires for `event` (`"INSERT"` / `"UPDATE"` / `"DELETE"`) at
1173    /// the given `timing` (`"BEFORE"` / `"AFTER"`), and clone its
1174    /// referenced function definition. Returned as a vec of owned
1175    /// `FunctionDef` so the row-write loop can fire them without
1176    /// holding a borrow on the catalog (which would conflict with
1177    /// the table.insert / update_row / delete mutable borrows).
1178    fn snapshot_row_triggers(
1179        &self,
1180        table: &str,
1181        event: &str,
1182        timing: &str,
1183    ) -> Vec<spg_storage::FunctionDef> {
1184        let cat = self.active_catalog();
1185        cat.triggers()
1186            .iter()
1187            .filter(|t| {
1188                t.table == table
1189                    && t.timing.eq_ignore_ascii_case(timing)
1190                    && t.for_each.eq_ignore_ascii_case("row")
1191                    && t.events.iter().any(|e| e.eq_ignore_ascii_case(event))
1192            })
1193            .filter_map(|t| cat.functions().get(&t.function).cloned())
1194            .collect()
1195    }
1196
1197    /// v7.13.0 — UPDATE-side snapshot that pairs each trigger's
1198    /// function with its `UPDATE OF cols` filter (mailrs round-5
1199    /// G7). Empty filter Vec means "fire unconditionally", matching
1200    /// the v7.12 behaviour.
1201    fn snapshot_update_row_triggers(
1202        &self,
1203        table: &str,
1204        timing: &str,
1205    ) -> Vec<(spg_storage::FunctionDef, Vec<String>)> {
1206        let cat = self.active_catalog();
1207        cat.triggers()
1208            .iter()
1209            .filter(|t| {
1210                t.table == table
1211                    && t.timing.eq_ignore_ascii_case(timing)
1212                    && t.for_each.eq_ignore_ascii_case("row")
1213                    && t.events.iter().any(|e| e.eq_ignore_ascii_case("UPDATE"))
1214            })
1215            .filter_map(|t| {
1216                cat.functions()
1217                    .get(&t.function)
1218                    .cloned()
1219                    .map(|fd| (fd, t.update_columns.clone()))
1220            })
1221            .collect()
1222    }
1223
1224    /// v7.12.7 — drain the trigger-emitted embedded SQL queue.
1225    /// Called by the INSERT / UPDATE / DELETE executors after
1226    /// their main row-write loop returns. Each statement runs
1227    /// inside the same cancel scope as the firing DML and bumps
1228    /// the recursion counter; nested embedded SQL beyond
1229    /// [`MAX_TRIGGER_RECURSION`] errors with a clear message so
1230    /// a trigger-graph cycle surfaces as a query failure instead
1231    /// of stack-blowing the engine.
1232    fn execute_deferred_trigger_stmts(
1233        &mut self,
1234        deferred: Vec<triggers::DeferredEmbeddedStmt>,
1235        cancel: CancelToken<'_>,
1236    ) -> Result<(), EngineError> {
1237        for d in deferred {
1238            if self.trigger_recursion_depth >= MAX_TRIGGER_RECURSION {
1239                return Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1240                    "trigger embedded SQL recursion depth {} exceeded (trigger function \
1241                     {:?} would push past the {} cap — check for trigger cycles)",
1242                    self.trigger_recursion_depth,
1243                    d.function,
1244                    MAX_TRIGGER_RECURSION,
1245                ))));
1246            }
1247            self.trigger_recursion_depth += 1;
1248            let res = self.execute_stmt_with_cancel(d.stmt, cancel);
1249            self.trigger_recursion_depth -= 1;
1250            res?;
1251        }
1252        Ok(())
1253    }
1254
1255    fn active_catalog_mut(&mut self) -> &mut Catalog {
1256        let tx = self.current_tx;
1257        match tx {
1258            Some(t) => match self.tx_catalogs.get_mut(&t) {
1259                Some(s) => &mut s.catalog,
1260                None => &mut self.catalog,
1261            },
1262            None => &mut self.catalog,
1263        }
1264    }
1265
1266    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1267    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1268    /// every other statement, so the caller can fall through to the
1269    /// `&mut self` `execute` path under a write lock. Engine state is
1270    /// not mutated even on the success path (`rewrite_clock_calls`
1271    /// and `resolve_order_by_position` both mutate the locally-owned
1272    /// AST, not `self`).
1273    ///
1274    /// **v4.0 concurrency**: this is the entry point the server takes
1275    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1276    /// parallel without serialising on a single mutex.
1277    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1278        self.execute_readonly_with_cancel(sql, CancelToken::none())
1279    }
1280
1281    /// v4.5 — read path with cooperative cancellation. Token's
1282    /// `is_cancelled` is checked at the start (so a watchdog that
1283    /// already fired returns Cancelled immediately) and at row-loop
1284    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1285    /// don't bother checking.
1286    pub fn execute_readonly_with_cancel(
1287        &self,
1288        sql: &str,
1289        cancel: CancelToken<'_>,
1290    ) -> Result<QueryResult, EngineError> {
1291        cancel.check()?;
1292        let mut stmt = parser::parse_statement(sql)?;
1293        let now_micros = self.clock.map(|f| f());
1294        rewrite_clock_calls(&mut stmt, now_micros);
1295        if let Statement::Select(s) = &mut stmt {
1296            resolve_order_by_position(s);
1297            // v6.2.3 — cost-based JOIN reorder (read path).
1298            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1299        }
1300        let result = match stmt {
1301            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1302            Statement::ShowTables => Ok(self.exec_show_tables()),
1303            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1304            Statement::ShowUsers => Ok(self.exec_show_users()),
1305            Statement::ShowPublications => Ok(self.exec_show_publications()),
1306            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1307            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1308                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1309            )),
1310            Statement::Explain(e) => self.exec_explain(&e, cancel),
1311            _ => Err(EngineError::WriteRequired),
1312        };
1313        self.enforce_row_limit(result)
1314    }
1315
1316    /// v4.2: cap result-set size. Applied after the executor
1317    /// materialises rows but before they leave the engine — wrapping
1318    /// every Rows-returning exec_* function would scatter the check.
1319    fn enforce_row_limit(
1320        &self,
1321        result: Result<QueryResult, EngineError>,
1322    ) -> Result<QueryResult, EngineError> {
1323        if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1324            && rows.len() > cap
1325        {
1326            return Err(EngineError::RowLimitExceeded(cap));
1327        }
1328        result
1329    }
1330
1331    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1332        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1333    }
1334
1335    /// v4.5 — write path with cooperative cancellation. Same dispatch
1336    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1337    /// a separate entry point for backward-compat with the v4.5
1338    /// public API.
1339    pub fn execute_with_cancel(
1340        &mut self,
1341        sql: &str,
1342        cancel: CancelToken<'_>,
1343    ) -> Result<QueryResult, EngineError> {
1344        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1345    }
1346
1347    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1348    /// slot identified by `tx_id` so spg-server dispatch can scope
1349    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1350    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1351    /// every other caller (engine self-tests, replay, spg-embedded)
1352    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1353    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1354        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1355    }
1356
1357    /// v4.41.1 write path with cooperative cancellation + explicit TX
1358    /// scope. Sets `self.current_tx` for the duration of the call so
1359    /// every `exec_*` helper transparently sees its TX's shadow
1360    /// catalog and savepoint stack; restores on exit so the field is
1361    /// only valid mid-call (no leakage across calls).
1362    pub fn execute_in_with_cancel(
1363        &mut self,
1364        sql: &str,
1365        tx_id: TxId,
1366        cancel: CancelToken<'_>,
1367    ) -> Result<QueryResult, EngineError> {
1368        let saved = self.current_tx;
1369        self.current_tx = Some(tx_id);
1370        let result = self.execute_inner_with_cancel(sql, cancel);
1371        self.current_tx = saved;
1372        result
1373    }
1374
1375    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1376    /// resulting [`Statement`] can be cached and re-executed via
1377    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1378    /// the simple-query path would synthesise internally (clock
1379    /// rewrites + ORDER BY position-ref resolution applied at
1380    /// prepare time, since both are session-independent). The
1381    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1382    /// nodes; they're resolved to concrete values per-call by
1383    /// `execute_prepared`'s substitution walk.
1384    ///
1385    /// Pgwire's `Parse` (P) message lands here.
1386    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1387        let mut stmt = parser::parse_statement(sql)?;
1388        let now_micros = self.clock.map(|f| f());
1389        rewrite_clock_calls(&mut stmt, now_micros);
1390        if let Statement::Select(s) = &mut stmt {
1391            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1392            // SELECT-list item BEFORE position / alias resolution so
1393            // downstream passes see the explicit list.
1394            expand_group_by_all(s);
1395            resolve_order_by_position(s);
1396            // v6.2.3 — cost-based JOIN reorder. No-op for
1397            // single-table FROMs or any non-INNER join shape.
1398            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1399        }
1400        Ok(stmt)
1401    }
1402
1403    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1404    /// the plan cache on hit, runs the full `prepare()` path on miss
1405    /// and inserts the resulting plan before returning. Skipping the
1406    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1407    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1408    ///
1409    /// Returns a cloned `Statement` (not a borrow) because the
1410    /// pgwire layer owns its `PreparedStmt` map per-session and the
1411    /// engine-level cache must stay available for other sessions.
1412    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1413    /// it replaces.
1414    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1415        // v6.3.1 — version-aware lookup. If the cached plan was
1416        // prepared before the most recent ANALYZE, evict and replan.
1417        let current_version = self.statistics.version();
1418        if let Some(plan) = self.plan_cache.get(sql) {
1419            if plan.statistics_version == current_version {
1420                return Ok(plan.stmt.clone());
1421            }
1422            // Stale entry — fall through to evict + re-prepare.
1423        }
1424        self.plan_cache.evict(sql);
1425        let stmt = self.prepare(sql)?;
1426        let source_tables = plan_cache::collect_source_tables(&stmt);
1427        let plan = plan_cache::PreparedPlan {
1428            stmt: stmt.clone(),
1429            statistics_version: current_version,
1430            source_tables,
1431            describe_columns: alloc::vec::Vec::new(),
1432        };
1433        self.plan_cache.insert(String::from(sql), plan);
1434        Ok(stmt)
1435    }
1436
1437    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1438    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1439        &self.plan_cache
1440    }
1441
1442    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1443    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1444        &mut self.plan_cache
1445    }
1446
1447    /// v6.3.3 — Describe a prepared `Statement` without executing.
1448    /// Returns `(parameter_oids, output_columns)`. Empty
1449    /// `output_columns` means the statement has no row-producing
1450    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1451    /// — pgwire layer maps that to a `NoData` reply.
1452    pub fn describe_prepared(&self, stmt: &Statement) -> (Vec<u32>, Vec<ColumnSchema>) {
1453        describe::describe_prepared(stmt, self.active_catalog())
1454    }
1455
1456    /// v6.1.1 — execute a [`Statement`] previously returned by
1457    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1458    /// nodes for the corresponding [`Value`] in `params` (1-based
1459    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1460    /// are decoded into typed `Value`s by the pgwire layer before
1461    /// this call so the resulting AST hits the same execution
1462    /// path as a simple query — no SQL re-parse.
1463    ///
1464    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1465    pub fn execute_prepared(
1466        &mut self,
1467        mut stmt: Statement,
1468        params: &[Value],
1469    ) -> Result<QueryResult, EngineError> {
1470        substitute_placeholders(&mut stmt, params)?;
1471        self.execute_stmt_with_cancel(stmt, CancelToken::none())
1472    }
1473
1474    fn execute_inner_with_cancel(
1475        &mut self,
1476        sql: &str,
1477        cancel: CancelToken<'_>,
1478    ) -> Result<QueryResult, EngineError> {
1479        cancel.check()?;
1480        let stmt = self.prepare(sql)?;
1481        // v6.5.1 — wrap the executor with a wall-clock window so we
1482        // can record into spg_stat_query. Skip when the engine has
1483        // no clock attached (no_std embedded callers).
1484        let start_us = self.clock.map(|f| f());
1485        let result = self.execute_stmt_with_cancel(stmt, cancel);
1486        if let (Some(t0), Ok(_)) = (start_us, &result) {
1487            let now = self.clock.map_or(t0, |f| f());
1488            let elapsed = now.saturating_sub(t0).max(0) as u64;
1489            self.query_stats.record(sql, elapsed, now as u64);
1490            // v6.5.6 — slow-query log: fire callback when elapsed
1491            // exceeds the configured floor.
1492            if let (Some(threshold), Some(logger)) =
1493                (self.slow_query_threshold_us, self.slow_query_logger)
1494                && elapsed >= threshold
1495            {
1496                logger(sql, elapsed);
1497            }
1498        }
1499        result
1500    }
1501
1502    fn execute_stmt_with_cancel(
1503        &mut self,
1504        stmt: Statement,
1505        cancel: CancelToken<'_>,
1506    ) -> Result<QueryResult, EngineError> {
1507        cancel.check()?;
1508        let result = match stmt {
1509            Statement::CreateTable(s) => self.exec_create_table(s),
1510            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
1511            // CommandOk with affected=0; modified_catalog=false so
1512            // the WAL doesn't grow a useless entry. mailrs F3.
1513            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
1514                affected: 0,
1515                modified_catalog: false,
1516            }),
1517            // v7.9.27 — DO $$ ... $$ is also a no-op (SPG has no
1518            // PL/pgSQL). mailrs H1 + pg_dump compat.
1519            Statement::DoBlock => Ok(QueryResult::CommandOk {
1520                affected: 0,
1521                modified_catalog: false,
1522            }),
1523            Statement::CreateIndex(s) => self.exec_create_index(s),
1524            Statement::Insert(s) => self.exec_insert(s),
1525            Statement::Update(s) => self.exec_update_cancel(&s, cancel),
1526            Statement::Delete(s) => self.exec_delete_cancel(&s, cancel),
1527            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1528            Statement::Begin => self.exec_begin(),
1529            Statement::Commit => self.exec_commit(),
1530            Statement::Rollback => self.exec_rollback(),
1531            Statement::Savepoint(name) => self.exec_savepoint(name),
1532            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
1533            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
1534            Statement::ShowTables => Ok(self.exec_show_tables()),
1535            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1536            Statement::ShowUsers => Ok(self.exec_show_users()),
1537            Statement::ShowPublications => Ok(self.exec_show_publications()),
1538            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1539            Statement::CreateUser(s) => self.exec_create_user(&s),
1540            Statement::DropUser(name) => self.exec_drop_user(&name),
1541            Statement::Explain(e) => self.exec_explain(&e, cancel),
1542            Statement::AlterIndex(s) => self.exec_alter_index(s),
1543            Statement::AlterTable(s) => self.exec_alter_table(s),
1544            Statement::CreatePublication(s) => self.exec_create_publication(s),
1545            Statement::DropPublication(name) => self.exec_drop_publication(&name),
1546            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
1547            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
1548            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
1549            // which lives in spg-server's ServerState. The engine
1550            // surfaces a clear error; the server-layer dispatch
1551            // intercepts the SQL before it reaches the engine on
1552            // a server build, so this arm only fires for
1553            // engine-only callers (spg-embedded, lib tests).
1554            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1555                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1556            )),
1557            // v6.2.0 — ANALYZE recomputes per-column histograms.
1558            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
1559            // v6.7.3 — COMPACT COLD SEGMENTS.
1560            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
1561            // v7.12.1 — SET / RESET session parameter. Engine
1562            // tracks the value in `session_params`; FTS dispatcher
1563            // reads `default_text_search_config`. Everything else
1564            // is a recorded no-op (PG dump compat).
1565            Statement::SetParameter { name, value } => {
1566                self.set_session_param(name, value);
1567                Ok(QueryResult::CommandOk {
1568                    affected: 0,
1569                    modified_catalog: false,
1570                })
1571            }
1572            // v7.12.4 — CREATE FUNCTION / CREATE TRIGGER / DROP …
1573            // for the PL/pgSQL trigger surface. exec_* methods are
1574            // defined alongside the existing CREATE handlers below.
1575            Statement::CreateFunction(s) => self.exec_create_function(s),
1576            Statement::CreateTrigger(s) => self.exec_create_trigger(s),
1577            Statement::DropTrigger {
1578                name,
1579                table,
1580                if_exists,
1581            } => self.exec_drop_trigger(&name, &table, if_exists),
1582            Statement::DropFunction { name, if_exists } => {
1583                self.exec_drop_function(&name, if_exists)
1584            }
1585            Statement::ResetParameter(target) => {
1586                match target {
1587                    None => self.session_params.clear(),
1588                    Some(name) => {
1589                        self.session_params.remove(&name.to_ascii_lowercase());
1590                    }
1591                }
1592                Ok(QueryResult::CommandOk {
1593                    affected: 0,
1594                    modified_catalog: false,
1595                })
1596            }
1597        };
1598        self.enforce_row_limit(result)
1599    }
1600
1601    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
1602    /// surface as `EngineError::Unsupported` so the existing PG-wire
1603    /// error mapping stays uniform; the message carries the name so
1604    /// operators can grep replication-log noise. Inside-transaction
1605    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
1606    /// stance) — replication-catalog mutation is a connection-level
1607    /// administrative op, not a transactional one.
1608    fn exec_create_publication(
1609        &mut self,
1610        s: CreatePublicationStatement,
1611    ) -> Result<QueryResult, EngineError> {
1612        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
1613        // was over-cautious: it also blocked the auto-commit wrap
1614        // path (which begins an internal TX around every WAL-
1615        // logged statement). PG itself allows CREATE PUBLICATION
1616        // inside a transaction (it rolls back with the TX).
1617        self.publications
1618            .create(s.name, s.scope)
1619            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
1620        Ok(QueryResult::CommandOk {
1621            affected: 1,
1622            modified_catalog: true,
1623        })
1624    }
1625
1626    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
1627    /// no-op when the publication doesn't exist (returns `affected=0`
1628    /// in that case so the wire-level command tag distinguishes
1629    /// "dropped" from "no-op", though both succeed).
1630    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1631        let removed = self.publications.drop(name);
1632        Ok(QueryResult::CommandOk {
1633            affected: usize::from(removed),
1634            modified_catalog: removed,
1635        })
1636    }
1637
1638    /// v6.1.2 — read access to the publication catalog. Used by
1639    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
1640    /// (v6.1.3+), and by e2e tests that need to assert state without
1641    /// going through the wire.
1642    pub const fn publications(&self) -> &publications::Publications {
1643        &self.publications
1644    }
1645
1646    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
1647    /// `enabled = true` and `last_received_pos = 0` for a freshly-
1648    /// created subscription. The actual worker thread is spawned
1649    /// by spg-server once the engine returns success.
1650    fn exec_create_subscription(
1651        &mut self,
1652        s: CreateSubscriptionStatement,
1653    ) -> Result<QueryResult, EngineError> {
1654        // See exec_create_publication — the in_transaction gate
1655        // was over-cautious; the auto-commit wrap path holds an
1656        // internal TX that this check was incorrectly blocking.
1657        let sub = subscriptions::Subscription {
1658            conn_str: s.conn_str,
1659            publications: s.publications,
1660            enabled: true,
1661            last_received_pos: 0,
1662        };
1663        self.subscriptions
1664            .create(s.name, sub)
1665            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
1666        Ok(QueryResult::CommandOk {
1667            affected: 1,
1668            modified_catalog: true,
1669        })
1670    }
1671
1672    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
1673    /// doesn't exist (PG-compatible). The associated worker is
1674    /// torn down by spg-server when it observes the catalog
1675    /// change at the next snapshot or via the engine's
1676    /// subscriptions accessor (the worker polls the catalog on
1677    /// reconnect; v6.1.5's filter-side will tighten this to an
1678    /// explicit signal).
1679    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1680        let removed = self.subscriptions.drop(name);
1681        Ok(QueryResult::CommandOk {
1682            affected: usize::from(removed),
1683            modified_catalog: removed,
1684        })
1685    }
1686
1687    /// v6.1.4 — read access to the subscription catalog. Used by
1688    /// the subscription worker (read its own row to find its
1689    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
1690    /// and by e2e tests asserting state directly.
1691    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
1692        &self.subscriptions
1693    }
1694
1695    /// v6.1.4 — write access to `last_received_pos`. Worker
1696    /// calls this after each apply batch (under the engine's
1697    /// write-lock). Returns `false` when the subscription was
1698    /// dropped between when the worker received the record and
1699    /// when this call landed.
1700    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
1701        self.subscriptions.update_last_received_pos(name, pos)
1702    }
1703
1704    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
1705    /// `(name, conn_str, publications, enabled, last_received_pos)`
1706    /// ordered by subscription name. The `publications` column is
1707    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
1708    /// callers wanting structured access read `Engine::subscriptions`.
1709    fn exec_show_subscriptions(&self) -> QueryResult {
1710        let columns = alloc::vec![
1711            ColumnSchema::new("name", DataType::Text, false),
1712            ColumnSchema::new("conn_str", DataType::Text, false),
1713            ColumnSchema::new("publications", DataType::Text, false),
1714            ColumnSchema::new("enabled", DataType::Bool, false),
1715            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1716        ];
1717        let rows: Vec<Row> = self
1718            .subscriptions
1719            .iter()
1720            .map(|(name, sub)| {
1721                Row::new(alloc::vec![
1722                    Value::Text(name.clone()),
1723                    Value::Text(sub.conn_str.clone()),
1724                    Value::Text(sub.publications.join(", ")),
1725                    Value::Bool(sub.enabled),
1726                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1727                ])
1728            })
1729            .collect();
1730        QueryResult::Rows { columns, rows }
1731    }
1732
1733    /// v6.2.0 — materialise `spg_statistic` rows. One row per
1734    /// `(table, column)` pair tracked in `Statistics`, with
1735    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
1736    /// the same canonical form vector literals use for round-trip.
1737    fn exec_spg_statistic(&self) -> QueryResult {
1738        let columns = alloc::vec![
1739            ColumnSchema::new("table_name", DataType::Text, false),
1740            ColumnSchema::new("column_name", DataType::Text, false),
1741            ColumnSchema::new("null_frac", DataType::Float, false),
1742            ColumnSchema::new("n_distinct", DataType::BigInt, false),
1743            ColumnSchema::new("histogram_bounds", DataType::Text, false),
1744            // v6.7.0 — appended column (v6.2.0 stability contract
1745            // allows APPEND to spg_statistic, not reorder/rename).
1746            // Reports the cached per-table cold-row count; same
1747            // value across every column row of the same table.
1748            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
1749        ];
1750        let rows: Vec<Row> = self
1751            .statistics
1752            .iter()
1753            .map(|((t, c), s)| {
1754                let cold = self
1755                    .catalog
1756                    .get(t)
1757                    .map_or(0, |table| table.cold_row_count());
1758                Row::new(alloc::vec![
1759                    Value::Text(t.clone()),
1760                    Value::Text(c.clone()),
1761                    Value::Float(f64::from(s.null_frac)),
1762                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
1763                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
1764                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
1765                ])
1766            })
1767            .collect();
1768        QueryResult::Rows { columns, rows }
1769    }
1770
1771    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
1772    /// per subscription with `(name, conn_str, publications,
1773    /// last_received_pos, enabled)`. Surface mirrors
1774    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
1775    /// shape so it composes with SELECT clauses (WHERE, projection
1776    /// onto specific columns, etc).
1777    fn exec_spg_stat_replication(&self) -> QueryResult {
1778        let columns = alloc::vec![
1779            ColumnSchema::new("name", DataType::Text, false),
1780            ColumnSchema::new("conn_str", DataType::Text, false),
1781            ColumnSchema::new("publications", DataType::Text, false),
1782            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1783            ColumnSchema::new("enabled", DataType::Bool, false),
1784        ];
1785        let rows: Vec<Row> = self
1786            .subscriptions
1787            .iter()
1788            .map(|(name, sub)| {
1789                Row::new(alloc::vec![
1790                    Value::Text(name.clone()),
1791                    Value::Text(sub.conn_str.clone()),
1792                    Value::Text(sub.publications.join(",")),
1793                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1794                    Value::Bool(sub.enabled),
1795                ])
1796            })
1797            .collect();
1798        QueryResult::Rows { columns, rows }
1799    }
1800
1801    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
1802    /// cold-tier segment with `(segment_id, num_rows, num_pages,
1803    /// total_bytes)`.
1804    ///
1805    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
1806    /// carve-out. Walks every user table's BTree indices to find
1807    /// which table's Cold locators point at each segment. Empty
1808    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
1809    /// before any index registered a locator). The walk is
1810    /// O(tables × indices × keys); cached per call, not across
1811    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
1812    fn exec_spg_stat_segment(&self) -> QueryResult {
1813        let columns = alloc::vec![
1814            ColumnSchema::new("segment_id", DataType::BigInt, false),
1815            ColumnSchema::new("table_name", DataType::Text, false),
1816            ColumnSchema::new("num_rows", DataType::BigInt, false),
1817            ColumnSchema::new("num_pages", DataType::BigInt, false),
1818            ColumnSchema::new("total_bytes", DataType::BigInt, false),
1819        ];
1820        // v6.7.0 — build a segment_id → table_name map by walking
1821        // every user table's BTree indices once. O(tables × indices
1822        // × keys) for the v6.5.0 carve-out resolution; acceptable
1823        // because spg_stat_segment is operator-facing (not on a
1824        // hot-loop path).
1825        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
1826        for tname in self.catalog.table_names() {
1827            if is_internal_table_name(&tname) {
1828                continue;
1829            }
1830            let Some(t) = self.catalog.get(&tname) else {
1831                continue;
1832            };
1833            for idx in t.indices() {
1834                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
1835                    for (_, locs) in map.iter() {
1836                        for loc in locs {
1837                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
1838                                segment_owners
1839                                    .entry(*segment_id)
1840                                    .or_insert_with(|| tname.clone());
1841                            }
1842                        }
1843                    }
1844                }
1845            }
1846        }
1847        let rows: Vec<Row> = self
1848            .catalog
1849            .cold_segment_ids_global()
1850            .iter()
1851            .filter_map(|&id| {
1852                let seg = self.catalog.cold_segment(id)?;
1853                let meta = seg.meta();
1854                let owner = segment_owners.get(&id).cloned().unwrap_or_default();
1855                Some(Row::new(alloc::vec![
1856                    Value::BigInt(i64::from(id)),
1857                    Value::Text(owner),
1858                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
1859                    Value::BigInt(i64::from(meta.num_pages)),
1860                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
1861                ]))
1862            })
1863            .collect();
1864        QueryResult::Rows { columns, rows }
1865    }
1866
1867    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
1868    /// distinct SQL text recorded since the engine booted, capped
1869    /// at `QUERY_STATS_MAX` (1024). Columns:
1870    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
1871    /// mean_us = total_us / exec_count (saturating).
1872    fn exec_spg_stat_query(&self) -> QueryResult {
1873        let columns = alloc::vec![
1874            ColumnSchema::new("sql", DataType::Text, false),
1875            ColumnSchema::new("exec_count", DataType::BigInt, false),
1876            ColumnSchema::new("total_us", DataType::BigInt, false),
1877            ColumnSchema::new("mean_us", DataType::BigInt, false),
1878            ColumnSchema::new("max_us", DataType::BigInt, false),
1879            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
1880        ];
1881        let rows: Vec<Row> = self
1882            .query_stats
1883            .snapshot()
1884            .into_iter()
1885            .map(|(sql, s)| {
1886                let mean = if s.exec_count == 0 {
1887                    0
1888                } else {
1889                    s.total_us / s.exec_count
1890                };
1891                Row::new(alloc::vec![
1892                    Value::Text(sql),
1893                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
1894                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
1895                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
1896                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
1897                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
1898                ])
1899            })
1900            .collect();
1901        QueryResult::Rows { columns, rows }
1902    }
1903
1904    /// v6.5.2 — register a connection-state provider. spg-server
1905    /// calls this at startup with a function that snapshots its
1906    /// per-pgwire-connection registry. Engine reads through the
1907    /// callback on `SELECT * FROM spg_stat_activity`.
1908    #[must_use]
1909    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
1910        self.activity_provider = Some(f);
1911        self
1912    }
1913
1914    /// v6.5.3 — register audit chain provider + verifier.
1915    #[must_use]
1916    pub const fn with_audit_providers(
1917        mut self,
1918        chain: AuditChainProvider,
1919        verify: AuditVerifier,
1920    ) -> Self {
1921        self.audit_chain_provider = Some(chain);
1922        self.audit_verifier = Some(verify);
1923        self
1924    }
1925
1926    /// v6.5.6 — register a slow-query log callback. `threshold_us`
1927    /// is the floor (in microseconds); only executes above the floor
1928    /// fire the callback. spg-server wires this from
1929    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
1930    #[must_use]
1931    pub const fn with_slow_query_log(mut self, threshold_us: u64, logger: SlowQueryLogger) -> Self {
1932        self.slow_query_threshold_us = Some(threshold_us);
1933        self.slow_query_logger = Some(logger);
1934        self
1935    }
1936
1937    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
1938    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
1939    /// the compile-time default of 256.
1940    pub fn set_plan_cache_max(&mut self, n: usize) {
1941        self.plan_cache.set_max_entries(n);
1942    }
1943
1944    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
1945    /// snapshot from the registered `ActivityProvider`. Returns an
1946    /// empty result set when no provider is registered (the no_std
1947    /// embedded path with no pgwire layer).
1948    fn exec_spg_stat_activity(&self) -> QueryResult {
1949        let columns = alloc::vec![
1950            ColumnSchema::new("pid", DataType::Int, false),
1951            ColumnSchema::new("user", DataType::Text, false),
1952            ColumnSchema::new("started_at_us", DataType::BigInt, false),
1953            ColumnSchema::new("current_sql", DataType::Text, false),
1954            ColumnSchema::new("wait_event", DataType::Text, false),
1955            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
1956            ColumnSchema::new("in_transaction", DataType::Bool, false),
1957        ];
1958        let rows: Vec<Row> = self
1959            .activity_provider
1960            .map(|f| f())
1961            .unwrap_or_default()
1962            .into_iter()
1963            .map(|r| {
1964                Row::new(alloc::vec![
1965                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
1966                    Value::Text(r.user),
1967                    Value::BigInt(r.started_at_us),
1968                    Value::Text(r.current_sql),
1969                    Value::Text(r.wait_event),
1970                    Value::BigInt(r.elapsed_us),
1971                    Value::Bool(r.in_transaction),
1972                ])
1973            })
1974            .collect();
1975        QueryResult::Rows { columns, rows }
1976    }
1977
1978    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
1979    /// table with `(table_name, ddl)`. Reconstructed from catalog
1980    /// state on demand.
1981    fn exec_spg_table_ddl(&self) -> QueryResult {
1982        let columns = alloc::vec![
1983            ColumnSchema::new("table_name", DataType::Text, false),
1984            ColumnSchema::new("ddl", DataType::Text, false),
1985        ];
1986        let rows: Vec<Row> = self
1987            .catalog
1988            .table_names()
1989            .into_iter()
1990            .filter(|n| !is_internal_table_name(n))
1991            .filter_map(|name| {
1992                let table = self.catalog.get(&name)?;
1993                let ddl = render_create_table(&name, &table.schema().columns);
1994                Some(Row::new(alloc::vec![Value::Text(name), Value::Text(ddl),]))
1995            })
1996            .collect();
1997        QueryResult::Rows { columns, rows }
1998    }
1999
2000    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
2001    /// with `(role_name, ddl)`. Password is redacted (matches the
2002    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
2003    fn exec_spg_role_ddl(&self) -> QueryResult {
2004        let columns = alloc::vec![
2005            ColumnSchema::new("role_name", DataType::Text, false),
2006            ColumnSchema::new("ddl", DataType::Text, false),
2007        ];
2008        let rows: Vec<Row> = self
2009            .users
2010            .iter()
2011            .map(|(name, rec)| {
2012                let ddl = alloc::format!(
2013                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
2014                    rec.role.as_str(),
2015                );
2016                Row::new(alloc::vec![
2017                    Value::Text(String::from(name)),
2018                    Value::Text(ddl)
2019                ])
2020            })
2021            .collect();
2022        QueryResult::Rows { columns, rows }
2023    }
2024
2025    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
2026    /// `ddl` column concatenates every user table's CREATE +
2027    /// every role's CREATE in deterministic catalog order. Suitable
2028    /// for piping back through `Engine::execute` to recreate a
2029    /// schema-equivalent database.
2030    fn exec_spg_database_ddl(&self) -> QueryResult {
2031        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
2032        let mut out = String::new();
2033        for (name, rec) in self.users.iter() {
2034            out.push_str(&alloc::format!(
2035                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
2036                rec.role.as_str(),
2037            ));
2038        }
2039        for name in self.catalog.table_names() {
2040            if is_internal_table_name(&name) {
2041                continue;
2042            }
2043            if let Some(table) = self.catalog.get(&name) {
2044                out.push_str(&render_create_table(&name, &table.schema().columns));
2045                out.push_str(";\n");
2046            }
2047        }
2048        QueryResult::Rows {
2049            columns,
2050            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
2051        }
2052    }
2053
2054    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
2055    /// snapshot from the registered provider; empty when no
2056    /// provider is set.
2057    fn exec_spg_audit_chain(&self) -> QueryResult {
2058        let columns = alloc::vec![
2059            ColumnSchema::new("seq", DataType::BigInt, false),
2060            ColumnSchema::new("ts_ms", DataType::BigInt, false),
2061            ColumnSchema::new("prev_hash", DataType::Text, false),
2062            ColumnSchema::new("entry_hash", DataType::Text, false),
2063            ColumnSchema::new("sql", DataType::Text, false),
2064        ];
2065        let rows: Vec<Row> = self
2066            .audit_chain_provider
2067            .map(|f| f())
2068            .unwrap_or_default()
2069            .into_iter()
2070            .map(|r| {
2071                Row::new(alloc::vec![
2072                    Value::BigInt(r.seq),
2073                    Value::BigInt(r.ts_ms),
2074                    Value::Text(r.prev_hash_hex),
2075                    Value::Text(r.entry_hash_hex),
2076                    Value::Text(r.sql),
2077                ])
2078            })
2079            .collect();
2080        QueryResult::Rows { columns, rows }
2081    }
2082
2083    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
2084    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
2085    /// on a clean chain. Returns one row with both values 0 when
2086    /// no verifier is registered (no-data fallback for embedded
2087    /// callers).
2088    fn exec_spg_audit_verify(&self) -> QueryResult {
2089        let columns = alloc::vec![
2090            ColumnSchema::new("verified_count", DataType::BigInt, false),
2091            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
2092        ];
2093        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
2094        let row = Row::new(alloc::vec![Value::BigInt(verified), Value::BigInt(broken),]);
2095        QueryResult::Rows {
2096            columns,
2097            rows: alloc::vec![row],
2098        }
2099    }
2100
2101    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
2102    pub fn query_stats(&self) -> &query_stats::QueryStats {
2103        &self.query_stats
2104    }
2105
2106    /// v6.5.1 — mutable accessor (clear, etc).
2107    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
2108        &mut self.query_stats
2109    }
2110
2111    /// v6.2.0 — read access to the per-column statistics table.
2112    /// Used by the planner (v6.2.2 selectivity functions read this),
2113    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
2114    pub const fn statistics(&self) -> &statistics::Statistics {
2115        &self.statistics
2116    }
2117
2118    /// v6.2.1 — return tables whose modified-row count crossed the
2119    /// auto-analyze threshold since the last ANALYZE on that table.
2120    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
2121    /// ANALYZE)` — combines PG-style fractional + absolute lower
2122    /// bound so a fresh / tiny table doesn't get hammered on every
2123    /// INSERT.
2124    ///
2125    /// Designed to be cheap: walks every user table's
2126    /// `Catalog::table_names()` + reads `statistics::modified_
2127    /// since_last_analyze()` (BTreeMap lookup). The background
2128    /// worker calls this under `engine.read()` then drops the lock
2129    /// before re-acquiring `engine.write()` for the actual ANALYZE.
2130    pub fn tables_needing_analyze(&self) -> Vec<String> {
2131        const MIN_ROWS: u64 = 100;
2132        let mut out = Vec::new();
2133        for name in self.catalog.table_names() {
2134            if is_internal_table_name(&name) {
2135                continue;
2136            }
2137            let Some(table) = self.catalog.get(&name) else {
2138                continue;
2139            };
2140            let row_count = table.rows().len() as u64;
2141            let modified = self.statistics.modified_since_last_analyze(&name);
2142            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
2143            // computed in integer arithmetic so spg-engine stays
2144            // no_std without pulling in libm. `(n + 9) / 10` is
2145            // `ceil(n / 10)` for non-negative `n`.
2146            let base = row_count.max(MIN_ROWS);
2147            let threshold = base.saturating_add(9) / 10;
2148            if modified >= threshold {
2149                out.push(name);
2150            }
2151        }
2152        out
2153    }
2154
2155    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2156    /// every user table; `ANALYZE <name>` re-stats one. For each
2157    /// target table, single-pass scan + per-column histogram +
2158    /// `null_frac` + `n_distinct`. Replaces the table's prior
2159    /// stats; resets the modified-row counter.
2160    ///
2161    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2162    /// can add reservoir sampling at the > 100 K-row mark; not a
2163    /// scope blocker for the current commit since rows ≤ 100 K
2164    /// analyse in milliseconds.
2165    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2166        let names: Vec<String> = if let Some(name) = target {
2167            // Verify the table exists; surface a clear error if not.
2168            if self.catalog.get(name).is_none() {
2169                return Err(EngineError::Storage(StorageError::TableNotFound {
2170                    name: name.to_string(),
2171                }));
2172            }
2173            alloc::vec![name.to_string()]
2174        } else {
2175            self.catalog
2176                .table_names()
2177                .into_iter()
2178                .filter(|n| !is_internal_table_name(n))
2179                .collect()
2180        };
2181        let mut analysed = 0usize;
2182        for table_name in &names {
2183            self.analyze_one_table(table_name)?;
2184            analysed += 1;
2185        }
2186        // v6.3.1 — plan cache invalidation. Bump stats version so
2187        // future lookups see the new generation, and selectively
2188        // evict every plan whose `source_tables` overlap with the
2189        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2190        // whole cache.
2191        if analysed > 0 {
2192            self.statistics.bump_version();
2193            if target.is_some() {
2194                for t in &names {
2195                    self.plan_cache.evict_referencing(t);
2196                }
2197            } else {
2198                self.plan_cache.clear();
2199            }
2200        }
2201        Ok(QueryResult::CommandOk {
2202            affected: analysed,
2203            modified_catalog: true,
2204        })
2205    }
2206
2207    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2208    /// engine-layer compaction shim with the default
2209    /// 4 MiB segment-size threshold. spg-server intercepts the
2210    /// SQL before it reaches the engine on a server build —
2211    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2212    /// `Engine::compact_cold_segments_with_target` directly with
2213    /// the env value, and persists every merged segment to
2214    /// v7.12.1 — record a `SET <name> = <value>` parameter. Names
2215    /// are case-folded to lowercase to match PG; values keep their
2216    /// caller-supplied form so observability paths see what was
2217    /// requested. Only `default_text_search_config` is consulted by
2218    /// the engine today.
2219    fn set_session_param(&mut self, name: String, value: spg_sql::ast::SetValue) {
2220        let normalised = match value {
2221            spg_sql::ast::SetValue::String(s) => s,
2222            spg_sql::ast::SetValue::Ident(s) => s,
2223            spg_sql::ast::SetValue::Number(s) => s,
2224            spg_sql::ast::SetValue::Default => String::new(),
2225        };
2226        self.session_params
2227            .insert(name.to_ascii_lowercase(), normalised);
2228    }
2229
2230    /// v7.12.1 — read a session parameter set via `SET`. Used by
2231    /// the FTS function dispatcher to resolve the default config
2232    /// for `to_tsvector(text)` / `plainto_tsquery(text)` etc.
2233    #[must_use]
2234    pub fn session_param(&self, name: &str) -> Option<&str> {
2235        self.session_params
2236            .get(&name.to_ascii_lowercase())
2237            .map(String::as_str)
2238    }
2239
2240    /// v7.12.1 — build an `EvalContext` chained with the session's
2241    /// `default_text_search_config`. Engine-internal callers use
2242    /// this instead of `EvalContext::new` so the FTS function
2243    /// dispatcher sees the SET configuration.
2244    fn ev_ctx<'a>(
2245        &'a self,
2246        columns: &'a [ColumnSchema],
2247        alias: Option<&'a str>,
2248    ) -> EvalContext<'a> {
2249        EvalContext::new(columns, alias)
2250            .with_default_text_search_config(self.session_param("default_text_search_config"))
2251    }
2252
2253    /// `<db>.spg/segments/`. This arm only fires for engine-only
2254    /// callers (spg-embedded, lib tests); in that mode merged
2255    /// segments live in memory and are dropped at process exit.
2256    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2257        let target = COMPACTION_TARGET_DEFAULT_BYTES;
2258        let reports = self.compact_cold_segments_with_target(target)?;
2259        let columns = alloc::vec![
2260            ColumnSchema::new("table_name", DataType::Text, false),
2261            ColumnSchema::new("index_name", DataType::Text, false),
2262            ColumnSchema::new("sources_merged", DataType::BigInt, false),
2263            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2264            ColumnSchema::new("merged_rows", DataType::BigInt, false),
2265            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2266            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2267        ];
2268        let rows: Vec<Row> = reports
2269            .into_iter()
2270            .map(|(tname, iname, report)| {
2271                Row::new(alloc::vec![
2272                    Value::Text(tname),
2273                    Value::Text(iname),
2274                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2275                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2276                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2277                    Value::BigInt(i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),),
2278                    Value::BigInt(
2279                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2280                    ),
2281                ])
2282            })
2283            .collect();
2284        Ok(QueryResult::Rows { columns, rows })
2285    }
2286
2287    /// Walk a single table's rows once and (re-)populate per-column
2288    /// stats. Drops the existing stats for `table` first so columns
2289    /// that have been DROP-ed between ANALYZEs don't leave stale
2290    /// rows.
2291    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2292        let table = self.catalog.get(table_name).ok_or_else(|| {
2293            EngineError::Storage(StorageError::TableNotFound {
2294                name: table_name.to_string(),
2295            })
2296        })?;
2297        let schema = table.schema().clone();
2298        let row_count = table.rows().len();
2299        // For each column, collect (sorted) non-NULL textual values
2300        // + count NULLs; then ask `statistics::build_histogram` to
2301        // produce the 101 bounds and `estimate_n_distinct` the
2302        // distinct count.
2303        self.statistics.clear_table(table_name);
2304        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2305            // v6.2.0 skip: vector columns have their own stats
2306            // shape (HNSW graph topology). v6.2 deliberation #1.
2307            if matches!(col_schema.ty, DataType::Vector { .. }) {
2308                continue;
2309            }
2310            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2311            let mut nulls: u64 = 0;
2312            for row in table.rows() {
2313                match row.values.get(col_pos) {
2314                    Some(Value::Null) | None => nulls += 1,
2315                    Some(v) => non_null_values.push(v.clone()),
2316                }
2317            }
2318            // Sort by type-aware ordering (Int as int, Text as
2319            // lex, etc.) so histogram bounds reflect the column's
2320            // natural order — not lexicographic on the string
2321            // representation, which would put "9" after "49".
2322            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2323            let non_null: Vec<String> = non_null_values.iter().map(canonical_value_repr).collect();
2324            let null_frac = if row_count == 0 {
2325                0.0
2326            } else {
2327                #[allow(clippy::cast_precision_loss)]
2328                let f = nulls as f32 / row_count as f32;
2329                f
2330            };
2331            let n_distinct = statistics::estimate_n_distinct(&non_null);
2332            let histogram_bounds = statistics::build_histogram(&non_null);
2333            self.statistics.set(
2334                table_name.to_string(),
2335                col_schema.name.clone(),
2336                statistics::ColumnStats {
2337                    null_frac,
2338                    n_distinct,
2339                    histogram_bounds,
2340                },
2341            );
2342        }
2343        self.statistics.reset_modified(table_name);
2344        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
2345        // BTree indices and count Cold locators (MAX across
2346        // indices); store the result on the table. Surfaced via
2347        // `spg_statistic.cold_row_count` (new column) and
2348        // `spg_stat_segment.table_name` (new column).
2349        let cold_count = {
2350            let table = self
2351                .active_catalog()
2352                .get(table_name)
2353                .expect("table still present");
2354            table.count_cold_locators()
2355        };
2356        let table_mut = self
2357            .active_catalog_mut()
2358            .get_mut(table_name)
2359            .expect("table still present");
2360        table_mut.set_cold_row_count(cold_count);
2361        Ok(())
2362    }
2363
2364    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
2365    /// `(name, scope, table_count)` ordered by publication name.
2366    ///   - `scope` is the human-readable string:
2367    ///       `"FOR ALL TABLES"` /
2368    ///       `"FOR TABLE t1, t2"` /
2369    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
2370    ///   - `table_count` is NULL for `AllTables`, the list length
2371    ///     otherwise. NULLability lets clients distinguish "publish
2372    ///     everything" from "publish exactly 0 tables" (the v6.1.3
2373    ///     parser forbids the empty list, but the column shape is
2374    ///     ready for the v6.1.5 publisher-side semantics).
2375    fn exec_show_publications(&self) -> QueryResult {
2376        let columns = alloc::vec![
2377            ColumnSchema::new("name", DataType::Text, false),
2378            ColumnSchema::new("scope", DataType::Text, false),
2379            ColumnSchema::new("table_count", DataType::Int, true),
2380        ];
2381        let rows: Vec<Row> = self
2382            .publications
2383            .iter()
2384            .map(|(name, scope)| {
2385                let (scope_str, count_val) = match scope {
2386                    spg_sql::ast::PublicationScope::AllTables => {
2387                        ("FOR ALL TABLES".to_string(), Value::Null)
2388                    }
2389                    spg_sql::ast::PublicationScope::ForTables(ts) => (
2390                        alloc::format!("FOR TABLE {}", ts.join(", ")),
2391                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2392                    ),
2393                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
2394                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
2395                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2396                    ),
2397                };
2398                Row::new(alloc::vec![
2399                    Value::Text(name.clone()),
2400                    Value::Text(scope_str),
2401                    count_val,
2402                ])
2403            })
2404            .collect();
2405        QueryResult::Rows { columns, rows }
2406    }
2407
2408    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
2409    fn exec_show_users(&self) -> QueryResult {
2410        let columns = alloc::vec![
2411            ColumnSchema::new("name", DataType::Text, false),
2412            ColumnSchema::new("role", DataType::Text, false),
2413        ];
2414        let rows: Vec<Row> = self
2415            .users
2416            .iter()
2417            .map(|(name, rec)| {
2418                Row::new(alloc::vec![
2419                    Value::Text(name.to_string()),
2420                    Value::Text(rec.role.as_str().to_string()),
2421                ])
2422            })
2423            .collect();
2424        QueryResult::Rows { columns, rows }
2425    }
2426
2427    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
2428        if self.in_transaction() {
2429            return Err(EngineError::Unsupported(
2430                "CREATE USER is not allowed inside a transaction".into(),
2431            ));
2432        }
2433        let role = users::Role::parse(&s.role).ok_or_else(|| {
2434            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
2435        })?;
2436        // Prefer the host-injected RNG. Falls back to a deterministic
2437        // salt derived from the username only when no RNG is wired —
2438        // acceptable for tests; the server always installs one.
2439        let salt = self.salt_fn.map_or_else(
2440            || {
2441                let mut s_bytes = [0u8; 16];
2442                let digest = spg_crypto::hash(s.name.as_bytes());
2443                s_bytes.copy_from_slice(&digest[..16]);
2444                s_bytes
2445            },
2446            |f| f(),
2447        );
2448        self.users
2449            .create(&s.name, &s.password, role, salt)
2450            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
2451        Ok(QueryResult::CommandOk {
2452            affected: 1,
2453            modified_catalog: true,
2454        })
2455    }
2456
2457    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2458        if self.in_transaction() {
2459            return Err(EngineError::Unsupported(
2460                "DROP USER is not allowed inside a transaction".into(),
2461            ));
2462        }
2463        self.users
2464            .drop(name)
2465            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
2466        Ok(QueryResult::CommandOk {
2467            affected: 1,
2468            modified_catalog: true,
2469        })
2470    }
2471
2472    /// v7.12.4 — `CREATE [OR REPLACE] FUNCTION`. Stores the
2473    /// function metadata in the catalog. PL/pgSQL bodies are
2474    /// already parsed by the SQL parser; we re-canonicalise the
2475    /// body to source text for storage (the executor re-parses
2476    /// it at trigger fire time — see the trigger fire path).
2477    fn exec_create_function(
2478        &mut self,
2479        s: spg_sql::ast::CreateFunctionStatement,
2480    ) -> Result<QueryResult, EngineError> {
2481        let args_repr = render_function_args(&s.args);
2482        let returns = match &s.returns {
2483            spg_sql::ast::FunctionReturn::Trigger => alloc::string::String::from("TRIGGER"),
2484            spg_sql::ast::FunctionReturn::Void => alloc::string::String::from("VOID"),
2485            spg_sql::ast::FunctionReturn::Type(t) => alloc::format!("{t}"),
2486            spg_sql::ast::FunctionReturn::Other(s) => s.clone(),
2487        };
2488        let body_text = match &s.body {
2489            spg_sql::ast::FunctionBody::PlPgSql(b) => alloc::format!("{b}"),
2490            spg_sql::ast::FunctionBody::Raw(s) => s.clone(),
2491        };
2492        let def = spg_storage::FunctionDef {
2493            name: s.name.clone(),
2494            args_repr,
2495            returns,
2496            language: s.language.clone(),
2497            body: body_text,
2498        };
2499        self.active_catalog_mut()
2500            .create_function(def, s.or_replace)
2501            .map_err(EngineError::Storage)?;
2502        Ok(QueryResult::CommandOk {
2503            affected: 0,
2504            modified_catalog: true,
2505        })
2506    }
2507
2508    /// v7.12.4 — `CREATE [OR REPLACE] TRIGGER`. The referenced
2509    /// function must already exist in the catalog (forward
2510    /// references defer to a later release). Persists the
2511    /// trigger metadata for the row-write hooks below to consult.
2512    fn exec_create_trigger(
2513        &mut self,
2514        s: spg_sql::ast::CreateTriggerStatement,
2515    ) -> Result<QueryResult, EngineError> {
2516        let timing = match s.timing {
2517            spg_sql::ast::TriggerTiming::Before => "BEFORE",
2518            spg_sql::ast::TriggerTiming::After => "AFTER",
2519            spg_sql::ast::TriggerTiming::InsteadOf => "INSTEAD OF",
2520        };
2521        let events: Vec<alloc::string::String> = s
2522            .events
2523            .iter()
2524            .map(|e| match e {
2525                spg_sql::ast::TriggerEvent::Insert => alloc::string::String::from("INSERT"),
2526                spg_sql::ast::TriggerEvent::Update => alloc::string::String::from("UPDATE"),
2527                spg_sql::ast::TriggerEvent::Delete => alloc::string::String::from("DELETE"),
2528                spg_sql::ast::TriggerEvent::Truncate => alloc::string::String::from("TRUNCATE"),
2529            })
2530            .collect();
2531        let for_each = match s.for_each {
2532            spg_sql::ast::TriggerForEach::Row => "ROW",
2533            spg_sql::ast::TriggerForEach::Statement => "STATEMENT",
2534        };
2535        let def = spg_storage::TriggerDef {
2536            name: s.name.clone(),
2537            table: s.table.clone(),
2538            timing: alloc::string::String::from(timing),
2539            events,
2540            for_each: alloc::string::String::from(for_each),
2541            function: s.function.clone(),
2542            update_columns: s.update_columns.clone(),
2543        };
2544        self.active_catalog_mut()
2545            .create_trigger(def, s.or_replace)
2546            .map_err(EngineError::Storage)?;
2547        Ok(QueryResult::CommandOk {
2548            affected: 0,
2549            modified_catalog: true,
2550        })
2551    }
2552
2553    fn exec_drop_trigger(
2554        &mut self,
2555        name: &str,
2556        table: &str,
2557        if_exists: bool,
2558    ) -> Result<QueryResult, EngineError> {
2559        let removed = self.active_catalog_mut().drop_trigger(name, table);
2560        if !removed && !if_exists {
2561            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2562                alloc::format!("trigger {name:?} on {table:?} does not exist"),
2563            )));
2564        }
2565        Ok(QueryResult::CommandOk {
2566            affected: usize::from(removed),
2567            modified_catalog: removed,
2568        })
2569    }
2570
2571    fn exec_drop_function(
2572        &mut self,
2573        name: &str,
2574        if_exists: bool,
2575    ) -> Result<QueryResult, EngineError> {
2576        let removed = self.active_catalog_mut().drop_function(name);
2577        if !removed && !if_exists {
2578            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2579                alloc::format!("function {name:?} does not exist"),
2580            )));
2581        }
2582        Ok(QueryResult::CommandOk {
2583            affected: usize::from(removed),
2584            modified_catalog: removed,
2585        })
2586    }
2587
2588    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
2589    /// Filter pass uses the same WHERE eval as `exec_select`. Per
2590    /// matched row, evaluate each RHS expression against the *old*
2591    /// row, then call `Table::update_row` which rebuilds indices.
2592    /// Indexed columns are correctly reflected because rebuild
2593    /// happens after the cell rewrite.
2594    fn exec_update_cancel(
2595        &mut self,
2596        stmt: &spg_sql::ast::UpdateStatement,
2597        cancel: CancelToken<'_>,
2598    ) -> Result<QueryResult, EngineError> {
2599        // v7.12.5 — snapshot BEFORE/AFTER UPDATE row triggers + the
2600        // session FTS config before the table mut-borrow opens (the
2601        // INSERT path uses the same pattern). Empty vecs are the
2602        // common "no triggers on this table" fast path.
2603        // v7.13.0 — UPDATE triggers carry an optional `UPDATE OF
2604        // cols` filter. The filter is paired with each function so
2605        // the per-row fire loop can skip when no listed column
2606        // actually differs between OLD and NEW.
2607        let before_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "BEFORE");
2608        let after_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "AFTER");
2609        let trigger_session_cfg: Option<String> = self
2610            .session_params
2611            .get("default_text_search_config")
2612            .cloned();
2613        // v5.2.3: if the WHERE is a PK equality and matches a cold-
2614        // tier row, promote it back to the hot tier *before* the
2615        // hot-row walk. The promote pushes the row to the end of
2616        // `table.rows`, where the upcoming SET-evaluation loop will
2617        // pick it up and apply the assignments. Lookups for the key
2618        // never observe a gap because `promote_cold_row` inserts the
2619        // hot row before retiring the cold locator.
2620        if let Some(w) = &stmt.where_ {
2621            let schema_cols = self
2622                .active_catalog()
2623                .get(&stmt.table)
2624                .ok_or_else(|| {
2625                    EngineError::Storage(StorageError::TableNotFound {
2626                        name: stmt.table.clone(),
2627                    })
2628                })?
2629                .schema()
2630                .columns
2631                .clone();
2632            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2633                && let Some(idx_name) = self
2634                    .active_catalog()
2635                    .get(&stmt.table)
2636                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2637            {
2638                // Promote may be a no-op (key is hot-only or absent);
2639                // we don't care about the return value here — the
2640                // subsequent hot walk will either match or not.
2641                let _ = self
2642                    .active_catalog_mut()
2643                    .promote_cold_row(&stmt.table, &idx_name, &key);
2644            }
2645        }
2646
2647        // v7.12.1 — cache session FTS config before the table
2648        // mut-borrow (same reason as exec_delete).
2649        let ts_cfg: Option<String> = self
2650            .session_param("default_text_search_config")
2651            .map(String::from);
2652        let table = self
2653            .active_catalog_mut()
2654            .get_mut(&stmt.table)
2655            .ok_or_else(|| {
2656                EngineError::Storage(StorageError::TableNotFound {
2657                    name: stmt.table.clone(),
2658                })
2659            })?;
2660        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2661        // Resolve each SET target to a column position once, validate
2662        // up front so a typo'd column doesn't leave a partial mutation
2663        // behind.
2664        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
2665        for (col, expr) in &stmt.assignments {
2666            let pos = schema_cols
2667                .iter()
2668                .position(|c| c.name == *col)
2669                .ok_or_else(|| {
2670                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
2671                })?;
2672            targets.push((pos, expr));
2673        }
2674        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
2675            .with_default_text_search_config(ts_cfg.as_deref());
2676        // Walk every row, evaluate WHERE then SET expressions. We
2677        // gather (position, new_values) tuples first and apply them
2678        // afterwards so the WHERE/RHS evaluation reads the original
2679        // row state — matches PG semantics (UPDATE doesn't see its
2680        // own writes).
2681        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
2682        for (i, row) in table.rows().iter().enumerate() {
2683            // v4.5: cooperative cancel checkpoint every 256 rows so
2684            // a runaway UPDATE without WHERE doesn't drag past the
2685            // server's query-timeout watchdog.
2686            if i.is_multiple_of(256) {
2687                cancel.check()?;
2688            }
2689            if let Some(w) = &stmt.where_ {
2690                let cond = eval::eval_expr(w, row, &ctx)?;
2691                if !matches!(cond, Value::Bool(true)) {
2692                    continue;
2693                }
2694            }
2695            let mut new_vals = row.values.clone();
2696            for (pos, expr) in &targets {
2697                let v = eval::eval_expr(expr, row, &ctx)?;
2698                new_vals[*pos] =
2699                    coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
2700            }
2701            planned.push((i, new_vals));
2702        }
2703        // v7.6.6 — capture pre-update row values for the FK
2704        // enforcement passes below. `planned` carries new values
2705        // only; pair them with the old row.
2706        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
2707            .iter()
2708            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
2709            .collect();
2710        let self_fks = table.schema().foreign_keys.clone();
2711        // v7.12.5 — `affected` is computed post-BEFORE-trigger
2712        // below (triggers may RETURN NULL to skip individual
2713        // rows). The pre-trigger len shape is no longer accurate.
2714        // Release mutable borrow on `table` for the FK passes.
2715        let _ = table;
2716        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
2717        // local FK columns changed, the new value must exist in the
2718        // parent.
2719        if !self_fks.is_empty() {
2720            let new_rows: Vec<Vec<Value>> = planned
2721                .iter()
2722                .map(|(_pos, new_vals)| new_vals.clone())
2723                .collect();
2724            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
2725        }
2726        // v7.13.0 — CHECK constraint enforcement on UPDATE
2727        // (mailrs round-5 G3). Predicates evaluated against the
2728        // candidate post-UPDATE row; false rejects the UPDATE.
2729        {
2730            let new_rows: Vec<Vec<Value>> = planned
2731                .iter()
2732                .map(|(_pos, new_vals)| new_vals.clone())
2733                .collect();
2734            enforce_check_constraints(self.active_catalog(), &stmt.table, &new_rows)?;
2735        }
2736        // v7.6.6 — Stage 2b: inbound FK check. For every row that
2737        // changed value in a column that *some other table* uses as
2738        // a FK parent column, react per `on_update` action.
2739        let child_plan =
2740            plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
2741        // Stage 3a — apply each child-side action.
2742        for step in &child_plan {
2743            apply_fk_child_step(self.active_catalog_mut(), step)?;
2744        }
2745        // Stage 3b — apply the original UPDATE.
2746        let table = self
2747            .active_catalog_mut()
2748            .get_mut(&stmt.table)
2749            .ok_or_else(|| {
2750                EngineError::Storage(StorageError::TableNotFound {
2751                    name: stmt.table.clone(),
2752                })
2753            })?;
2754        // v7.12.5 — fire BEFORE/AFTER UPDATE row-level triggers
2755        // around the apply loop. BEFORE sees NEW=candidate +
2756        // OLD=current; may rewrite NEW or RETURN NULL to skip.
2757        // AFTER sees NEW=post-write + OLD=pre-write (both read-
2758        // only).
2759        //
2760        // Filter `planned` through the BEFORE pass first so the
2761        // RETURNING snapshot reflects what actually got written
2762        // (triggers may rewrite cells, including a cancellation).
2763        let mut applied_after_before: Vec<(usize, Row, Row)> = Vec::with_capacity(planned.len());
2764        // v7.12.7 — embedded SQL queue.
2765        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
2766        for (pos, new_vals) in &planned {
2767            let old_row = table.rows()[*pos].clone();
2768            let mut new_row = Row::new(new_vals.clone());
2769            let mut skip = false;
2770            for (fd, filter) in &before_update_triggers {
2771                // v7.13.0 — `UPDATE OF cols` filter (mailrs round-5
2772                // G7). Skip this trigger when the filter is set and
2773                // no listed column actually differs between OLD and
2774                // NEW for this row.
2775                if !filter.is_empty()
2776                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2777                {
2778                    continue;
2779                }
2780                let (outcome, deferred) = triggers::fire_row_trigger(
2781                    fd,
2782                    Some(new_row.clone()),
2783                    Some(&old_row),
2784                    &stmt.table,
2785                    &schema_cols,
2786                    &[],
2787                    trigger_session_cfg.as_deref(),
2788                    false,
2789                )
2790                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2791                deferred_embedded.extend(deferred);
2792                match outcome {
2793                    triggers::TriggerOutcome::Row(r) => new_row = r,
2794                    triggers::TriggerOutcome::Skip => {
2795                        skip = true;
2796                        break;
2797                    }
2798                }
2799            }
2800            if !skip {
2801                applied_after_before.push((*pos, new_row, old_row));
2802            }
2803        }
2804        // v7.9.4 — snapshot post-update values for RETURNING (post-
2805        // BEFORE-trigger because triggers can rewrite cells).
2806        let updated_for_returning: Vec<Vec<Value>> = if stmt.returning.is_some() {
2807            applied_after_before
2808                .iter()
2809                .map(|(_pos, new_row, _old)| new_row.values.clone())
2810                .collect()
2811        } else {
2812            Vec::new()
2813        };
2814        let affected = applied_after_before.len();
2815        // Apply, then fire AFTER triggers per row. AFTER runs read-
2816        // only against the freshly-written row; v7.12.4-shape
2817        // assignment errors with a clear message.
2818        for (pos, new_row, old_row) in applied_after_before {
2819            table.update_row(pos, new_row.values.clone())?;
2820            for (fd, filter) in &after_update_triggers {
2821                if !filter.is_empty()
2822                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2823                {
2824                    continue;
2825                }
2826                let (_outcome, deferred) = triggers::fire_row_trigger(
2827                    fd,
2828                    Some(new_row.clone()),
2829                    Some(&old_row),
2830                    &stmt.table,
2831                    &schema_cols,
2832                    &[],
2833                    trigger_session_cfg.as_deref(),
2834                    true,
2835                )
2836                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2837                deferred_embedded.extend(deferred);
2838            }
2839        }
2840        let _ = table;
2841        // v7.12.7 — drain trigger-emitted embedded SQL for this UPDATE.
2842        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
2843        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
2844        if !self.in_transaction() && affected > 0 {
2845            self.statistics
2846                .record_modifications(&stmt.table, affected as u64);
2847        }
2848        // v7.9.4 — RETURNING projection.
2849        if let Some(items) = &stmt.returning {
2850            return self.build_returning_rows(&stmt.table, items, updated_for_returning);
2851        }
2852        Ok(QueryResult::CommandOk {
2853            affected,
2854            modified_catalog: !self.in_transaction(),
2855        })
2856    }
2857
2858    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
2859    /// positions then delegates to `Table::delete_rows` (single index
2860    /// rebuild for the batch).
2861    fn exec_delete_cancel(
2862        &mut self,
2863        stmt: &spg_sql::ast::DeleteStatement,
2864        cancel: CancelToken<'_>,
2865    ) -> Result<QueryResult, EngineError> {
2866        // v7.12.5 — snapshot BEFORE/AFTER DELETE row triggers + the
2867        // session FTS config before the mut borrow (same shape as
2868        // INSERT / UPDATE).
2869        let before_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "BEFORE");
2870        let after_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "AFTER");
2871        let trigger_session_cfg: Option<String> = self
2872            .session_params
2873            .get("default_text_search_config")
2874            .cloned();
2875        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
2876        // locator for the key. The cold row body stays in the
2877        // segment (becoming shadowed garbage that a future
2878        // compaction pass reclaims) but the index no longer
2879        // resolves it. The shadow count contributes to the
2880        // affected total; the subsequent hot walk handles any hot
2881        // rows for the same key.
2882        let mut cold_shadow_count: usize = 0;
2883        if let Some(w) = &stmt.where_ {
2884            let schema_cols = self
2885                .active_catalog()
2886                .get(&stmt.table)
2887                .ok_or_else(|| {
2888                    EngineError::Storage(StorageError::TableNotFound {
2889                        name: stmt.table.clone(),
2890                    })
2891                })?
2892                .schema()
2893                .columns
2894                .clone();
2895            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2896                && let Some(idx_name) = self
2897                    .active_catalog()
2898                    .get(&stmt.table)
2899                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2900            {
2901                cold_shadow_count = self
2902                    .active_catalog_mut()
2903                    .shadow_cold_row(&stmt.table, &idx_name, &key)
2904                    .unwrap_or(0);
2905            }
2906        }
2907
2908        // v7.12.1 — cache the session FTS config as an owned
2909        // String before the mutable table borrow below; the
2910        // ctx-builder then references it via `as_deref` so the
2911        // immutable read of `session_params` doesn't conflict
2912        // with the mut borrow chain.
2913        let ts_cfg: Option<String> = self
2914            .session_param("default_text_search_config")
2915            .map(String::from);
2916        let table = self
2917            .active_catalog_mut()
2918            .get_mut(&stmt.table)
2919            .ok_or_else(|| {
2920                EngineError::Storage(StorageError::TableNotFound {
2921                    name: stmt.table.clone(),
2922                })
2923            })?;
2924        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2925        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
2926            .with_default_text_search_config(ts_cfg.as_deref());
2927        let mut positions: Vec<usize> = Vec::new();
2928        // v7.6.3 — collect every to-delete row's full Value tuple
2929        // alongside its position, so the FK enforcement pass can
2930        // run after the mut borrow drops.
2931        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
2932        for (i, row) in table.rows().iter().enumerate() {
2933            if i.is_multiple_of(256) {
2934                cancel.check()?;
2935            }
2936            let keep = if let Some(w) = &stmt.where_ {
2937                let cond = eval::eval_expr(w, row, &ctx)?;
2938                !matches!(cond, Value::Bool(true))
2939            } else {
2940                false
2941            };
2942            if !keep {
2943                positions.push(i);
2944                to_delete_rows.push(row.values.clone());
2945            }
2946        }
2947        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
2948        // catalog. Release the mut borrow and run reverse-scan
2949        // against every child table whose FK targets this table.
2950        // RESTRICT / NoAction raise an error; CASCADE returns a
2951        // cascade plan that stage 3 applies after the primary delete.
2952        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
2953        let _ = table;
2954        // v7.12.5 — BEFORE DELETE row-level triggers. Each fires
2955        // with NEW=None / OLD=pre-delete row; RETURN OLD (or NEW)
2956        // = proceed, RETURN NULL = skip the row entirely. The
2957        // filter must run BEFORE the FK cascade plan so cascaded
2958        // child rows track the trigger's skip-decision on the
2959        // parent.
2960        // v7.12.7 — embedded SQL queue.
2961        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
2962        if !before_delete_triggers.is_empty() {
2963            let mut filtered_positions: Vec<usize> = Vec::with_capacity(positions.len());
2964            let mut filtered_old_rows: Vec<Vec<Value>> = Vec::with_capacity(to_delete_rows.len());
2965            for (pos, old_vals) in positions.iter().zip(to_delete_rows.iter()) {
2966                let old_row = Row::new(old_vals.clone());
2967                let mut cancel_this = false;
2968                for fd in &before_delete_triggers {
2969                    let (outcome, deferred) = triggers::fire_row_trigger(
2970                        fd,
2971                        None,
2972                        Some(&old_row),
2973                        &stmt.table,
2974                        &schema_cols,
2975                        &[],
2976                        trigger_session_cfg.as_deref(),
2977                        false,
2978                    )
2979                    .map_err(|e| {
2980                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
2981                    })?;
2982                    deferred_embedded.extend(deferred);
2983                    if matches!(outcome, triggers::TriggerOutcome::Skip) {
2984                        cancel_this = true;
2985                        break;
2986                    }
2987                }
2988                if !cancel_this {
2989                    filtered_positions.push(*pos);
2990                    filtered_old_rows.push(old_vals.clone());
2991                }
2992            }
2993            positions = filtered_positions;
2994            to_delete_rows = filtered_old_rows;
2995        }
2996        let cascade_plan = plan_fk_parent_deletions(
2997            self.active_catalog(),
2998            &stmt.table,
2999            &positions,
3000            &to_delete_rows,
3001        )?;
3002        // Stage 3a — apply each FK child step (SET NULL / SET
3003        // DEFAULT / CASCADE delete) before deleting the parent.
3004        // The plan is already ordered: nulls/defaults first, then
3005        // cascade deletes (so a row mutated and later deleted
3006        // surfaces as deleted — though v7.6.5 doesn't produce
3007        // that overlap today).
3008        for step in &cascade_plan {
3009            apply_fk_child_step(self.active_catalog_mut(), step)?;
3010        }
3011        // Stage 3b — actually delete the original target rows.
3012        let table = self
3013            .active_catalog_mut()
3014            .get_mut(&stmt.table)
3015            .ok_or_else(|| {
3016                EngineError::Storage(StorageError::TableNotFound {
3017                    name: stmt.table.clone(),
3018                })
3019            })?;
3020        let affected = table.delete_rows(&positions) + cold_shadow_count;
3021        let _ = table;
3022        // v7.12.5 — AFTER DELETE row-level triggers fire post-write
3023        // with NEW=None / OLD=pre-delete row (each from the
3024        // already-snapshotted to_delete_rows). Return value is
3025        // ignored (matches PG AFTER semantics).
3026        if !after_delete_triggers.is_empty() {
3027            for old_vals in &to_delete_rows {
3028                let old_row = Row::new(old_vals.clone());
3029                for fd in &after_delete_triggers {
3030                    let (_outcome, deferred) = triggers::fire_row_trigger(
3031                        fd,
3032                        None,
3033                        Some(&old_row),
3034                        &stmt.table,
3035                        &schema_cols,
3036                        &[],
3037                        trigger_session_cfg.as_deref(),
3038                        true,
3039                    )
3040                    .map_err(|e| {
3041                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3042                    })?;
3043                    deferred_embedded.extend(deferred);
3044                }
3045            }
3046        }
3047        // v7.12.7 — drain trigger-emitted embedded SQL for this DELETE.
3048        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
3049        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
3050        if !self.in_transaction() && affected > 0 {
3051            self.statistics
3052                .record_modifications(&stmt.table, affected as u64);
3053        }
3054        // v7.9.4 — RETURNING projection over the soon-to-be-gone
3055        // rows. `to_delete_rows` was snapshotted in stage 1 before
3056        // mutation, so the projection sees the pre-delete state
3057        // (matches PG semantics: DELETE RETURNING returns the row
3058        // as it was just before removal).
3059        if let Some(items) = &stmt.returning {
3060            return self.build_returning_rows(&stmt.table, items, to_delete_rows);
3061        }
3062        Ok(QueryResult::CommandOk {
3063            affected,
3064            modified_catalog: !self.in_transaction(),
3065        })
3066    }
3067
3068    /// `SHOW TABLES` — one row per table in the active catalog.
3069    /// Column name is `name` so result-set consumers can downstream
3070    /// `SELECT name FROM ...` style logic if needed.
3071    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
3072    /// `QUERY PLAN` text table — first line names the top operator
3073    /// (Scan / Aggregate / Window / etc.), indented children list
3074    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
3075    /// shape, and any active index hits. `ANALYZE` execs the inner
3076    /// SELECT and appends actual-row + elapsed-micros annotations.
3077    #[allow(clippy::format_push_string)]
3078    fn exec_explain(
3079        &self,
3080        e: &spg_sql::ast::ExplainStatement,
3081        cancel: CancelToken<'_>,
3082    ) -> Result<QueryResult, EngineError> {
3083        let mut lines = Vec::<String>::new();
3084        explain_select(&e.inner, self, 0, &mut lines);
3085        if e.suggest {
3086            // v6.8.3 — index advisor. Walks the SELECT's FROM
3087            // tables + WHERE column refs; for each (table, column)
3088            // pair that lacks an index, append a SUGGEST line with
3089            // a copy-pastable `CREATE INDEX` statement. This is a
3090            // pure-syntax heuristic — no cardinality estimation —
3091            // matching the v6.8.3 design intent of "tell the
3092            // operator where indexes are missing", not "give the
3093            // mathematically optimal index set".
3094            let suggestions = build_index_suggestions(&e.inner, self);
3095            for s in suggestions {
3096                lines.push(s);
3097            }
3098        } else if e.analyze {
3099            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
3100            // with `(rows=N)` where the row count is computable
3101            // without re-executing the full query:
3102            //   - Top-level operator (first non-indented line):
3103            //     rows = final result.len()
3104            //   - "From: <table> [full scan]" lines: rows =
3105            //     table.rows().len() (catalog read; no execution)
3106            //   - "From: <table> [index seek]": indeterminate —
3107            //     the index step would need re-execution; v6.2.5
3108            //     adds per-operator wall-clock + hot/cold rows
3109            //     instrumentation that makes this concrete.
3110            //   - Everything else: marked `(—)` so the surface
3111            //     stays well-defined without silently dropping
3112            //     stats. v6.2.5 fills in via inline executor
3113            //     instrumentation.
3114            // Total elapsed lands on a trailing `Total: …` line.
3115            let started = self.clock.map(|f| f());
3116            let exec = self.exec_select_cancel(&e.inner, cancel)?;
3117            let elapsed_micros = match (self.clock, started) {
3118                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
3119                _ => None,
3120            };
3121            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
3122                rows.len()
3123            } else {
3124                0
3125            };
3126            annotate_explain_lines(&mut lines, row_count, self);
3127            let mut total = alloc::format!("Total: rows={row_count}");
3128            if let Some(us) = elapsed_micros {
3129                total.push_str(&alloc::format!(" elapsed={us}us"));
3130            }
3131            lines.push(total);
3132        }
3133        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
3134        let rows: Vec<Row> = lines
3135            .into_iter()
3136            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
3137            .collect();
3138        Ok(QueryResult::Rows { columns, rows })
3139    }
3140
3141    fn exec_show_tables(&self) -> QueryResult {
3142        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
3143        let rows: Vec<Row> = self
3144            .active_catalog()
3145            .table_names()
3146            .into_iter()
3147            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
3148            .collect();
3149        QueryResult::Rows { columns, rows }
3150    }
3151
3152    /// `SHOW COLUMNS FROM <table>` — one row per column with the
3153    /// declared name, SQL type rendering, and nullability flag.
3154    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
3155        let table =
3156            self.active_catalog()
3157                .get(table_name)
3158                .ok_or_else(|| StorageError::TableNotFound {
3159                    name: table_name.into(),
3160                })?;
3161        let columns = alloc::vec![
3162            ColumnSchema::new("name", DataType::Text, false),
3163            ColumnSchema::new("type", DataType::Text, false),
3164            ColumnSchema::new("nullable", DataType::Bool, false),
3165        ];
3166        let rows: Vec<Row> = table
3167            .schema()
3168            .columns
3169            .iter()
3170            .map(|c| {
3171                Row::new(alloc::vec![
3172                    Value::Text(c.name.clone()),
3173                    Value::Text(alloc::format!("{}", c.ty)),
3174                    Value::Bool(c.nullable),
3175                ])
3176            })
3177            .collect();
3178        Ok(QueryResult::Rows { columns, rows })
3179    }
3180
3181    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
3182        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3183        if self.tx_catalogs.contains_key(&tx_id) {
3184            return Err(EngineError::TransactionAlreadyOpen);
3185        }
3186        self.tx_catalogs.insert(
3187            tx_id,
3188            TxState {
3189                catalog: self.catalog.clone(),
3190                savepoints: Vec::new(),
3191            },
3192        );
3193        Ok(QueryResult::CommandOk {
3194            affected: 0,
3195            modified_catalog: false,
3196        })
3197    }
3198
3199    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
3200        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3201        let state = self
3202            .tx_catalogs
3203            .remove(&tx_id)
3204            .ok_or(EngineError::NoActiveTransaction)?;
3205        self.catalog = state.catalog;
3206        // All savepoints become permanent at COMMIT and the stack
3207        // resets for the next TX (`state.savepoints` is discarded with
3208        // `state`).
3209        Ok(QueryResult::CommandOk {
3210            affected: 0,
3211            modified_catalog: true,
3212        })
3213    }
3214
3215    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
3216        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3217        if self.tx_catalogs.remove(&tx_id).is_none() {
3218            return Err(EngineError::NoActiveTransaction);
3219        }
3220        // savepoints discarded with the TxState
3221        Ok(QueryResult::CommandOk {
3222            affected: 0,
3223            modified_catalog: false,
3224        })
3225    }
3226
3227    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
3228        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3229        let state = self
3230            .tx_catalogs
3231            .get_mut(&tx_id)
3232            .ok_or(EngineError::NoActiveTransaction)?;
3233        // PG re-uses an existing savepoint name by dropping the older
3234        // entry and pushing a fresh one — match that behaviour so
3235        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
3236        state.savepoints.retain(|(n, _)| n != &name);
3237        let snapshot = state.catalog.clone();
3238        state.savepoints.push((name, snapshot));
3239        Ok(QueryResult::CommandOk {
3240            affected: 0,
3241            modified_catalog: false,
3242        })
3243    }
3244
3245    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3246        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3247        let state = self
3248            .tx_catalogs
3249            .get_mut(&tx_id)
3250            .ok_or(EngineError::NoActiveTransaction)?;
3251        let pos = state
3252            .savepoints
3253            .iter()
3254            .rposition(|(n, _)| n == name)
3255            .ok_or_else(|| {
3256                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3257            })?;
3258        // The savepoint stays on the stack (PG semantics): a later
3259        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
3260        // after it is discarded.
3261        let snapshot = state.savepoints[pos].1.clone();
3262        state.savepoints.truncate(pos + 1);
3263        state.catalog = snapshot;
3264        Ok(QueryResult::CommandOk {
3265            affected: 0,
3266            modified_catalog: false,
3267        })
3268    }
3269
3270    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3271        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3272        let state = self
3273            .tx_catalogs
3274            .get_mut(&tx_id)
3275            .ok_or(EngineError::NoActiveTransaction)?;
3276        let pos = state
3277            .savepoints
3278            .iter()
3279            .rposition(|(n, _)| n == name)
3280            .ok_or_else(|| {
3281                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3282            })?;
3283        // RELEASE keeps the work since the savepoint, just discards the
3284        // bookmark plus everything nested under it.
3285        state.savepoints.truncate(pos);
3286        Ok(QueryResult::CommandOk {
3287            affected: 0,
3288            modified_catalog: false,
3289        })
3290    }
3291
3292    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
3293    /// (encoding = …)]`. Walks every table in the active catalog
3294    /// looking for an index matching `stmt.name`, then delegates the
3295    /// rebuild (including any encoding switch) to
3296    /// `Table::rebuild_nsw_index`. The "live" non-blocking
3297    /// optimisation is v6.0.4.1 / v6.1.x territory.
3298    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
3299    /// arm. Currently the only setting is `hot_tier_bytes`; later
3300    /// v6.7.x can extend `AlterTableTarget` without touching this
3301    /// arm structure.
3302    fn exec_alter_table(
3303        &mut self,
3304        s: spg_sql::ast::AlterTableStatement,
3305    ) -> Result<QueryResult, EngineError> {
3306        // v7.13.2 — mailrs round-6 S1: apply each subaction in order.
3307        // On first error the statement aborts; subactions already
3308        // applied stay (no transactional rollback in v7.13 — wrap in
3309        // BEGIN/COMMIT if atomicity matters).
3310        let table_name = s.name.clone();
3311        for target in s.targets {
3312            self.exec_alter_table_subaction(&table_name, target)?;
3313        }
3314        Ok(QueryResult::CommandOk {
3315            affected: 0,
3316            modified_catalog: !self.in_transaction(),
3317        })
3318    }
3319
3320    fn exec_alter_table_subaction(
3321        &mut self,
3322        table_name_outer: &str,
3323        target: spg_sql::ast::AlterTableTarget,
3324    ) -> Result<(), EngineError> {
3325        // Inner helper retains the s.name closure shape; alias to `s`
3326        // for minimal diff against the v7.13.0 body.
3327        struct S<'a> {
3328            name: &'a str,
3329        }
3330        let s = S {
3331            name: table_name_outer,
3332        };
3333        match target {
3334            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
3335                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3336                    EngineError::Storage(StorageError::TableNotFound {
3337                        name: s.name.into(),
3338                    })
3339                })?;
3340                table.schema_mut().hot_tier_bytes = Some(n);
3341            }
3342            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
3343                // v7.6.8 — resolve FK against the live catalog first
3344                // (validates parent table, columns, indices). Then
3345                // verify every existing row in the child table
3346                // satisfies the new constraint. Then install it.
3347                let cols_snapshot = self
3348                    .active_catalog()
3349                    .get(s.name)
3350                    .ok_or_else(|| {
3351                        EngineError::Storage(StorageError::TableNotFound {
3352                            name: s.name.into(),
3353                        })
3354                    })?
3355                    .schema()
3356                    .columns
3357                    .clone();
3358                let storage_fk =
3359                    resolve_foreign_key(s.name, &cols_snapshot, fk, self.active_catalog())?;
3360                // Verify existing rows. Treat them as a virtual
3361                // INSERT batch — reusing the v7.6.2 enforce helper.
3362                let existing_rows: Vec<Vec<Value>> = self
3363                    .active_catalog()
3364                    .get(&s.name)
3365                    .expect("checked above")
3366                    .rows()
3367                    .iter()
3368                    .map(|r| r.values.clone())
3369                    .collect();
3370                enforce_fk_inserts(
3371                    self.active_catalog(),
3372                    s.name,
3373                    core::slice::from_ref(&storage_fk),
3374                    &existing_rows,
3375                )?;
3376                // Reject duplicate constraint name.
3377                let table = self
3378                    .active_catalog_mut()
3379                    .get_mut(s.name)
3380                    .expect("checked above");
3381                if let Some(name) = &storage_fk.name
3382                    && table
3383                        .schema()
3384                        .foreign_keys
3385                        .iter()
3386                        .any(|f| f.name.as_ref() == Some(name))
3387                {
3388                    return Err(EngineError::Unsupported(alloc::format!(
3389                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
3390                    )));
3391                }
3392                table.schema_mut().foreign_keys.push(storage_fk);
3393            }
3394            spg_sql::ast::AlterTableTarget::DropForeignKey { name, if_exists } => {
3395                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3396                    EngineError::Storage(StorageError::TableNotFound {
3397                        name: s.name.into(),
3398                    })
3399                })?;
3400                let fks = &mut table.schema_mut().foreign_keys;
3401                let before = fks.len();
3402                fks.retain(|f| f.name.as_ref() != Some(&name));
3403                if fks.len() == before && !if_exists {
3404                    return Err(EngineError::Unsupported(alloc::format!(
3405                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
3406                        s.name
3407                    )));
3408                }
3409                // v7.13.2 mailrs round-6 S7: IF EXISTS silences the miss.
3410            }
3411            spg_sql::ast::AlterTableTarget::AddColumn {
3412                column,
3413                if_not_exists,
3414            } => {
3415                // v7.13.0 — mailrs round-5 G1. Append-only column add
3416                // with back-fill of the DEFAULT (or NULL) into every
3417                // existing row. Column positions don't shift, so we
3418                // skip index rebuild.
3419                let clock = self.clock;
3420                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3421                    EngineError::Storage(StorageError::TableNotFound {
3422                        name: s.name.into(),
3423                    })
3424                })?;
3425                if table
3426                    .schema()
3427                    .columns
3428                    .iter()
3429                    .any(|c| c.name.eq_ignore_ascii_case(&column.name))
3430                {
3431                    if if_not_exists {
3432                        return Ok(());
3433                    }
3434                    return Err(EngineError::Unsupported(alloc::format!(
3435                        "ALTER TABLE ADD COLUMN: column {:?} already exists on {:?}",
3436                        column.name,
3437                        s.name
3438                    )));
3439                }
3440                let col_name = column.name.clone();
3441                let nullable = column.nullable;
3442                let has_default =
3443                    column.default.is_some() || column.auto_increment;
3444                let col_schema = column_def_to_schema(column)?;
3445                let row_count = table.row_count();
3446                // Compute the back-fill value. Literal / runtime DEFAULT
3447                // funnels through the same resolver that INSERT uses
3448                // (v7.9.21 `resolve_column_default_free`). NULL when
3449                // the column is nullable and has no DEFAULT. NOT NULL
3450                // without DEFAULT errors when the table has existing
3451                // rows — same as PG.
3452                let fill_value: Value = if has_default
3453                    || col_schema.runtime_default.is_some()
3454                {
3455                    resolve_column_default_free(&col_schema, clock)?
3456                } else if nullable || row_count == 0 {
3457                    Value::Null
3458                } else {
3459                    return Err(EngineError::Unsupported(alloc::format!(
3460                        "ALTER TABLE ADD COLUMN {col_name:?}: NOT NULL column requires DEFAULT \
3461                         when the table has existing rows"
3462                    )));
3463                };
3464                table.add_column(col_schema, fill_value);
3465            }
3466            spg_sql::ast::AlterTableTarget::AlterColumnType {
3467                column,
3468                new_type,
3469                using,
3470            } => {
3471                // v7.13.0 — mailrs round-5 G8. Re-evaluate each
3472                // row's column value (either through the USING
3473                // expression if supplied, or as a direct CAST of
3474                // the existing value) and re-coerce to the new
3475                // type. Indices on the column get rebuilt.
3476                let new_data_type = column_type_to_data_type(new_type);
3477                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3478                    EngineError::Storage(StorageError::TableNotFound {
3479                        name: s.name.into(),
3480                    })
3481                })?;
3482                let col_pos = table
3483                    .schema()
3484                    .columns
3485                    .iter()
3486                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3487                    .ok_or_else(|| {
3488                        EngineError::Unsupported(alloc::format!(
3489                            "ALTER COLUMN TYPE: column {column:?} not found on {:?}",
3490                            s.name
3491                        ))
3492                    })?;
3493                let schema_cols = table.schema().columns.clone();
3494                let ctx = eval::EvalContext::new(&schema_cols, None);
3495                let mut new_values: alloc::vec::Vec<Value> =
3496                    alloc::vec::Vec::with_capacity(table.row_count());
3497                for row in table.rows().iter() {
3498                    let raw = match &using {
3499                        Some(expr) => eval::eval_expr(expr, row, &ctx).map_err(|e| {
3500                            EngineError::Unsupported(alloc::format!(
3501                                "ALTER COLUMN TYPE: USING expression failed: {e:?}"
3502                            ))
3503                        })?,
3504                        None => row.values.get(col_pos).cloned().unwrap_or(Value::Null),
3505                    };
3506                    let coerced = coerce_value(raw, new_data_type, &column, col_pos)?;
3507                    new_values.push(coerced);
3508                }
3509                table.schema_mut().columns[col_pos].ty = new_data_type;
3510                for (i, v) in new_values.into_iter().enumerate() {
3511                    let mut row_values = table
3512                        .rows()
3513                        .get(i)
3514                        .expect("bounds-checked above")
3515                        .values
3516                        .clone();
3517                    row_values[col_pos] = v;
3518                    table.update_row(i, row_values)?;
3519                }
3520            }
3521            spg_sql::ast::AlterTableTarget::DropColumn {
3522                column,
3523                if_exists,
3524                cascade,
3525            } => {
3526                // v7.13.3 — mailrs round-7 S8. Remove the column +
3527                // every row's value at that position; drop any index
3528                // on the column. RESTRICT (default) rejects when an
3529                // FK on this table or partial-index predicate
3530                // references the column; CASCADE removes those
3531                // dependents first.
3532                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3533                    EngineError::Storage(StorageError::TableNotFound {
3534                        name: s.name.into(),
3535                    })
3536                })?;
3537                let col_pos = match table
3538                    .schema()
3539                    .columns
3540                    .iter()
3541                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3542                {
3543                    Some(p) => p,
3544                    None => {
3545                        if if_exists {
3546                            return Ok(());
3547                        }
3548                        return Err(EngineError::Unsupported(alloc::format!(
3549                            "ALTER TABLE DROP COLUMN: column {column:?} not found on {:?}",
3550                            s.name
3551                        )));
3552                    }
3553                };
3554                // Dependent check: FKs whose local columns include
3555                // col_pos. CASCADE drops them; otherwise reject.
3556                let dependent_fks: Vec<usize> = table
3557                    .schema()
3558                    .foreign_keys
3559                    .iter()
3560                    .enumerate()
3561                    .filter_map(|(i, fk)| {
3562                        if fk.local_columns.contains(&col_pos) {
3563                            Some(i)
3564                        } else {
3565                            None
3566                        }
3567                    })
3568                    .collect();
3569                if !dependent_fks.is_empty() && !cascade {
3570                    return Err(EngineError::Unsupported(alloc::format!(
3571                        "ALTER TABLE DROP COLUMN {column:?}: column has FK dependents; \
3572                         use DROP COLUMN ... CASCADE to remove them"
3573                    )));
3574                }
3575                // CASCADE the FK removals first.
3576                if cascade {
3577                    // Drop in reverse so indices stay valid.
3578                    let mut sorted = dependent_fks.clone();
3579                    sorted.sort();
3580                    sorted.reverse();
3581                    let fks = &mut table.schema_mut().foreign_keys;
3582                    for i in sorted {
3583                        fks.remove(i);
3584                    }
3585                }
3586                // Drop the column. New helper on Table does the
3587                // row + schema + index shift atomically.
3588                table.drop_column(col_pos);
3589            }
3590        }
3591        Ok(())
3592    }
3593
3594    fn exec_alter_index(
3595        &mut self,
3596        stmt: spg_sql::ast::AlterIndexStatement,
3597    ) -> Result<QueryResult, EngineError> {
3598        // Translate the optional SQL-side encoding choice into the
3599        // storage-side enum; the same SqlVecEncoding -> VecEncoding
3600        // bridge `column_type_to_data_type` uses.
3601        let spg_sql::ast::AlterIndexStatement {
3602            name: idx_name,
3603            target,
3604        } = stmt;
3605        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target;
3606        let target = encoding.map(|e| match e {
3607            SqlVecEncoding::F32 => VecEncoding::F32,
3608            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
3609            SqlVecEncoding::F16 => VecEncoding::F16,
3610        });
3611        // Linear scan: index names are globally unique within a
3612        // catalog (enforced by add_nsw_index_inner) so the first
3613        // match is the only one. Save the table name to avoid
3614        // borrowing while we then take a mut borrow.
3615        let table_name = {
3616            let cat = self.active_catalog();
3617            let mut found: Option<String> = None;
3618            for tname in cat.table_names() {
3619                if let Some(t) = cat.get(&tname)
3620                    && t.indices().iter().any(|i| i.name == idx_name)
3621                {
3622                    found = Some(tname);
3623                    break;
3624                }
3625            }
3626            found.ok_or_else(|| {
3627                EngineError::Storage(StorageError::IndexNotFound {
3628                    name: idx_name.clone(),
3629                })
3630            })?
3631        };
3632        let table = self
3633            .active_catalog_mut()
3634            .get_mut(&table_name)
3635            .expect("table found above");
3636        table.rebuild_nsw_index(&idx_name, target)?;
3637        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
3638        // changes cost characteristics; evict any cached plans.
3639        self.plan_cache.evict_referencing(&table_name);
3640        Ok(QueryResult::CommandOk {
3641            affected: 0,
3642            modified_catalog: !self.in_transaction(),
3643        })
3644    }
3645
3646    fn exec_create_index(
3647        &mut self,
3648        stmt: CreateIndexStatement,
3649    ) -> Result<QueryResult, EngineError> {
3650        let table = self
3651            .active_catalog_mut()
3652            .get_mut(&stmt.table)
3653            .ok_or_else(|| {
3654                EngineError::Storage(StorageError::TableNotFound {
3655                    name: stmt.table.clone(),
3656                })
3657            })?;
3658        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
3659        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
3660            return Ok(QueryResult::CommandOk {
3661                affected: 0,
3662                modified_catalog: false,
3663            });
3664        }
3665        // v7.9.14 — multi-column index parses through; engine
3666        // builds a single-column BTree on the leading column only.
3667        // The extras live on the AST so spg-server's dispatcher
3668        // can emit a PG-wire NoticeResponse / log line. Composite
3669        // BTree keys land in v7.10.
3670        let _ = &stmt.extra_columns; // intentional drop on engine side
3671        let table_name = stmt.table.clone();
3672        // v6.8.0 — resolve INCLUDE column names to positions. Done
3673        // before `add_index` so a typo error surfaces before any
3674        // catalog mutation lands.
3675        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
3676            Vec::new()
3677        } else {
3678            let schema = table.schema();
3679            stmt.included_columns
3680                .iter()
3681                .map(|c| {
3682                    schema.column_position(c).ok_or_else(|| {
3683                        EngineError::Storage(StorageError::ColumnNotFound { column: c.clone() })
3684                    })
3685                })
3686                .collect::<Result<Vec<_>, _>>()?
3687        };
3688        match stmt.method {
3689            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
3690            IndexMethod::Hnsw => {
3691                if !included_positions.is_empty() {
3692                    return Err(EngineError::Unsupported(
3693                        "INCLUDE columns are not supported on HNSW indexes".into(),
3694                    ));
3695                }
3696                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
3697            }
3698            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
3699            IndexMethod::Brin => {
3700                if !included_positions.is_empty() {
3701                    return Err(EngineError::Unsupported(
3702                        "INCLUDE columns are not supported on BRIN indexes".into(),
3703                    ));
3704                }
3705                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
3706            }
3707            // v7.12.3 — GIN inverted index. Real posting-list-backed
3708            // GIN when the indexed column is `tsvector`; falls back
3709            // to a BTree on the leading column for any other column
3710            // type so v7.9.26b's `pg_dump` compatibility (GIN on
3711            // JSONB etc. silently loading as BTree) is preserved.
3712            // Operators see the real GIN only where it matters; old
3713            // schemas keep loading.
3714            IndexMethod::Gin => {
3715                if !included_positions.is_empty() {
3716                    return Err(EngineError::Unsupported(
3717                        "INCLUDE columns are not supported on GIN indexes".into(),
3718                    ));
3719                }
3720                let col_pos = table
3721                    .schema()
3722                    .column_position(&stmt.column)
3723                    .ok_or_else(|| {
3724                        EngineError::Storage(StorageError::ColumnNotFound {
3725                            column: stmt.column.clone(),
3726                        })
3727                    })?;
3728                if table.schema().columns[col_pos].ty == spg_storage::DataType::TsVector {
3729                    table
3730                        .add_gin_index(stmt.name.clone(), &stmt.column)
3731                        .map_err(EngineError::Storage)?;
3732                } else {
3733                    // v7.9.26b BTree fallback — the catalog still
3734                    // gets an index entry on the leading column so
3735                    // pg_dump scripts that name GIN on JSONB / etc.
3736                    // load clean; query-time gain stays opt-in for
3737                    // tsvector callers.
3738                    table.add_index(stmt.name.clone(), &stmt.column)?;
3739                }
3740            }
3741        }
3742        if !included_positions.is_empty()
3743            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
3744        {
3745            idx.included_columns = included_positions;
3746        }
3747        // v6.8.1 — persist partial-index predicate. Stored as the
3748        // expression's Display form so the catalog snapshot stays
3749        // pure (storage has no spg-sql dependency). The runtime
3750        // maintenance path treats partial indexes identically to
3751        // full indexes for v6.8.1 (over-maintenance is safe; the
3752        // planner-side "use partial when query WHERE implies the
3753        // predicate" pass is STABILITY carve-out).
3754        if let Some(pred_expr) = &stmt.partial_predicate {
3755            let canonical = pred_expr.to_string();
3756            // v7.13.2 — mailrs round-6 S2. PG's `pg_trgm` uses
3757            // `CREATE INDEX … USING gin(col gin_trgm_ops) WHERE …`
3758            // routinely to slim trigram indexes. SPG now persists
3759            // the predicate for GIN / BRIN / HNSW the same way it
3760            // already does for BTree — same v6.8.1 "over-maintain
3761            // is safe; planner-side partial routing is STABILITY
3762            // carve-out" semantics. HNSW carries an additional
3763            // caveat: the predicate isn't applied at index build
3764            // time (would require per-row eval inside the NSW
3765            // construction loop), so the index oversamples; query
3766            // time the WHERE clause still filters correctly.
3767            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3768                idx.partial_predicate = Some(canonical);
3769            }
3770        }
3771        // v6.8.2 — persist expression index key. Same Display-form
3772        // storage; the runtime maintenance pass evaluates each
3773        // row's expression to derive the index key, but for v6.8.2
3774        // the engine falls through to the bare-column-reference
3775        // path and the expression is preserved for format-layer
3776        // round-trip + future planner work. Carved-out in
3777        // STABILITY § "Out of v6.8".
3778        if let Some(key_expr) = &stmt.expression {
3779            if matches!(
3780                stmt.method,
3781                IndexMethod::Hnsw | IndexMethod::Brin | IndexMethod::Gin
3782            ) {
3783                return Err(EngineError::Unsupported(
3784                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
3785                ));
3786            }
3787            let canonical = key_expr.to_string();
3788            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3789                idx.expression = Some(canonical);
3790            }
3791        }
3792        // v7.9.29 — persist `is_unique` flag on the storage Index.
3793        // Combined with `partial_predicate`, INSERT enforcement
3794        // checks that no other row whose predicate evaluates true
3795        // shares the same indexed key. Parser already rejected
3796        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
3797        // For multi-column UNIQUE INDEX the extras matter (the
3798        // full tuple is the uniqueness key), so resolve them to
3799        // column positions and persist on the index too.
3800        if stmt.is_unique {
3801            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
3802            for col_name in &stmt.extra_columns {
3803                let pos = table
3804                    .schema()
3805                    .columns
3806                    .iter()
3807                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
3808                    .ok_or_else(|| {
3809                        EngineError::Unsupported(alloc::format!(
3810                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
3811                            stmt.name,
3812                            stmt.table
3813                        ))
3814                    })?;
3815                extra_positions.push(pos);
3816            }
3817            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3818                idx.is_unique = true;
3819                idx.extra_column_positions = extra_positions;
3820            }
3821            // At index-creation time, check the existing rows for
3822            // pre-existing duplicates that would have violated the
3823            // new constraint — otherwise CREATE UNIQUE INDEX would
3824            // silently leave duplicates in place.
3825            let snapshot_indices = table.indices().to_vec();
3826            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
3827                table.rows().iter().cloned().collect();
3828            let snapshot_schema = table.schema().clone();
3829            let idx_ref = snapshot_indices
3830                .iter()
3831                .find(|i| i.name == stmt.name)
3832                .expect("just-added index");
3833            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
3834        }
3835        // v6.3.1 — adding an index can change the optimal plan for
3836        // any cached query that references this table.
3837        self.plan_cache.evict_referencing(&table_name);
3838        Ok(QueryResult::CommandOk {
3839            affected: 0,
3840            modified_catalog: !self.in_transaction(),
3841        })
3842    }
3843
3844    /// v7.13.3 — mailrs round-7 S9. SPG-specific reconciliation
3845    /// for `CREATE TABLE IF NOT EXISTS` when the table already
3846    /// exists. Adds missing columns + inline FKs from the new
3847    /// definition; existing columns / constraints stay untouched.
3848    /// New columns with a `NOT NULL` declaration without a
3849    /// `DEFAULT` are reported as a clear error rather than
3850    /// silently dropped — this is the "fail loud on real
3851    /// incompatibility, fail silent on schema-superset" tradeoff.
3852    fn reconcile_table_if_not_exists(
3853        &mut self,
3854        stmt: CreateTableStatement,
3855    ) -> Result<QueryResult, EngineError> {
3856        let table_name = stmt.name.clone();
3857        let clock = self.clock;
3858        let existing_col_names: alloc::collections::BTreeSet<String> = self
3859            .active_catalog()
3860            .get(&table_name)
3861            .expect("checked above")
3862            .schema()
3863            .columns
3864            .iter()
3865            .map(|c| c.name.to_ascii_lowercase())
3866            .collect();
3867        let row_count = self
3868            .active_catalog()
3869            .get(&table_name)
3870            .expect("checked above")
3871            .row_count();
3872        // Collect missing column defs in source order.
3873        let new_columns: alloc::vec::Vec<spg_sql::ast::ColumnDef> = stmt
3874            .columns
3875            .iter()
3876            .filter(|c| !existing_col_names.contains(&c.name.to_ascii_lowercase()))
3877            .cloned()
3878            .collect();
3879        for col_def in new_columns {
3880            let col_name = col_def.name.clone();
3881            let nullable = col_def.nullable;
3882            let has_default = col_def.default.is_some() || col_def.auto_increment;
3883            let col_schema = column_def_to_schema(col_def)?;
3884            let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
3885                resolve_column_default_free(&col_schema, clock)?
3886            } else if nullable || row_count == 0 {
3887                Value::Null
3888            } else {
3889                return Err(EngineError::Unsupported(alloc::format!(
3890                    "CREATE TABLE IF NOT EXISTS {table_name:?}: reconciling \
3891                     column {col_name:?} requires DEFAULT (existing rows would violate NOT NULL)"
3892                )));
3893            };
3894            let table = self
3895                .active_catalog_mut()
3896                .get_mut(&table_name)
3897                .expect("checked above");
3898            table.add_column(col_schema, fill_value);
3899        }
3900        // Resolve any newly-added inline FKs (column-level
3901        // REFERENCES forms) and install. Skip FKs whose local
3902        // columns we didn't have in the existing table.
3903        let table_cols_now = self
3904            .active_catalog()
3905            .get(&table_name)
3906            .expect("checked above")
3907            .schema()
3908            .columns
3909            .clone();
3910        for fk in stmt.foreign_keys {
3911            // Only install FKs whose every local column resolves
3912            // — older catalogs may have a column the new FK
3913            // references but not the column the new FK declares.
3914            let all_resolved = fk
3915                .columns
3916                .iter()
3917                .all(|c| table_cols_now.iter().any(|sc| sc.name.eq_ignore_ascii_case(c)));
3918            if !all_resolved {
3919                continue;
3920            }
3921            let already_present = {
3922                let table = self
3923                    .active_catalog()
3924                    .get(&table_name)
3925                    .expect("checked above");
3926                table.schema().foreign_keys.iter().any(|f| {
3927                    f.parent_table.eq_ignore_ascii_case(&fk.parent_table)
3928                        && f.local_columns.len() == fk.columns.len()
3929                })
3930            };
3931            if already_present {
3932                continue;
3933            }
3934            let storage_fk =
3935                resolve_foreign_key(&table_name, &table_cols_now, fk, self.active_catalog())?;
3936            let table = self
3937                .active_catalog_mut()
3938                .get_mut(&table_name)
3939                .expect("checked above");
3940            table.schema_mut().foreign_keys.push(storage_fk);
3941        }
3942        Ok(QueryResult::CommandOk {
3943            affected: 0,
3944            modified_catalog: !self.in_transaction(),
3945        })
3946    }
3947
3948    fn exec_create_table(
3949        &mut self,
3950        stmt: CreateTableStatement,
3951    ) -> Result<QueryResult, EngineError> {
3952        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
3953            // v7.13.3 — mailrs round-7 S9 reconciliation. PG's
3954            // semantics for `CREATE TABLE IF NOT EXISTS` is a
3955            // silent no-op when the table exists, even if the new
3956            // definition adds columns or constraints. SPG extends
3957            // this: any column in the new definition that's
3958            // missing from the existing table is added (with
3959            // DEFAULT back-fill / NULL); inline FKs likewise.
3960            // Existing columns are NOT modified. This makes
3961            // mailrs's schema layering (init-schema's `contacts`
3962            // sender-tracking table + migrate-023's CardDAV
3963            // `contacts` extension) converge correctly without
3964            // mailrs-side edits. PG users who want PG-strict
3965            // silent-no-op behaviour can use SPG's `--strict-pg`
3966            // flag (deferred to v7.14).
3967            return self.reconcile_table_if_not_exists(stmt);
3968        }
3969        let table_name = stmt.name.clone();
3970        // v7.9.13 — pluck the names of any columns marked
3971        // `PRIMARY KEY` inline so the post-create-table pass can
3972        // build an implicit BTree index. mailrs F1.
3973        let inline_pk_columns: Vec<String> = stmt
3974            .columns
3975            .iter()
3976            .filter(|c| c.is_primary_key)
3977            .map(|c| c.name.clone())
3978            .collect();
3979        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
3980        // and UNIQUE (a, b, ...). Each builds a BTree index on the
3981        // leading column (the existing single-column storage tier)
3982        // and registers a UniquenessConstraint on the schema for
3983        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
3984        let cols = stmt
3985            .columns
3986            .into_iter()
3987            .map(column_def_to_schema)
3988            .collect::<Result<Vec<_>, _>>()?;
3989        // Composite NOT-NULL implication for PRIMARY KEY columns.
3990        let mut cols = cols;
3991        for tc in &stmt.table_constraints {
3992            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
3993                for col_name in columns {
3994                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
3995                        col.nullable = false;
3996                    }
3997                }
3998            }
3999        }
4000        // v7.6.1 — resolve every FK in the statement against the
4001        // already-known catalog. Validates: parent table exists,
4002        // parent column names exist, arity matches, parent columns
4003        // have a PK / UNIQUE index. Self-referencing FKs (parent
4004        // table == this table) resolve against the column list we
4005        // just built — they don't need the catalog yet.
4006        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
4007            Vec::with_capacity(stmt.foreign_keys.len());
4008        for fk in stmt.foreign_keys {
4009            fks.push(resolve_foreign_key(
4010                &table_name,
4011                &cols,
4012                fk,
4013                self.active_catalog(),
4014            )?);
4015        }
4016        let mut schema = TableSchema::new(table_name.clone(), cols);
4017        schema.foreign_keys = fks;
4018        // v7.9.19 — translate AST table_constraints to storage
4019        // UniquenessConstraints (column name → position) so the
4020        // INSERT enforcement helper sees positions directly.
4021        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
4022        let mut check_exprs: Vec<String> = Vec::new();
4023        for tc in &stmt.table_constraints {
4024            let (is_pk, names, nnd) = match tc {
4025                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4026                    (true, columns.clone(), false)
4027                }
4028                spg_sql::ast::TableConstraint::Unique {
4029                    columns,
4030                    nulls_not_distinct,
4031                    ..
4032                } => (false, columns.clone(), *nulls_not_distinct),
4033                spg_sql::ast::TableConstraint::Check { expr, .. } => {
4034                    // v7.13.0 — collect CHECK predicate sources;
4035                    // they get attached to the schema below.
4036                    check_exprs.push(alloc::format!("{expr}"));
4037                    continue;
4038                }
4039            };
4040            let mut positions = Vec::with_capacity(names.len());
4041            for n in &names {
4042                let pos = schema
4043                    .columns
4044                    .iter()
4045                    .position(|c| c.name == *n)
4046                    .ok_or_else(|| {
4047                        EngineError::Unsupported(alloc::format!(
4048                            "table constraint references unknown column {n:?}"
4049                        ))
4050                    })?;
4051                positions.push(pos);
4052            }
4053            uc_storage.push(spg_storage::UniquenessConstraint {
4054                is_primary_key: is_pk,
4055                columns: positions,
4056                nulls_not_distinct: nnd,
4057            });
4058        }
4059        schema.uniqueness_constraints = uc_storage.clone();
4060        schema.checks = check_exprs;
4061        self.active_catalog_mut().create_table(schema)?;
4062        // v7.9.13 — implicit BTree per inline PK column +
4063        // v7.9.19 — implicit BTree on the leading column of every
4064        // table-level PRIMARY KEY / UNIQUE constraint.
4065        let table = self
4066            .active_catalog_mut()
4067            .get_mut(&table_name)
4068            .expect("just created");
4069        for (i, col_name) in inline_pk_columns.iter().enumerate() {
4070            let idx_name = if inline_pk_columns.len() == 1 {
4071                alloc::format!("{table_name}_pkey")
4072            } else {
4073                alloc::format!("{table_name}_pkey_{i}")
4074            };
4075            if let Err(e) = table.add_index(idx_name, col_name) {
4076                return Err(EngineError::Storage(e));
4077            }
4078        }
4079        for (i, tc) in stmt.table_constraints.iter().enumerate() {
4080            let (is_pk, names) = match tc {
4081                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => (true, columns),
4082                spg_sql::ast::TableConstraint::Unique { columns, .. } => (false, columns),
4083                spg_sql::ast::TableConstraint::Check { .. } => continue,
4084            };
4085            let leading = &names[0];
4086            // Skip if a same-column BTree already exists (e.g.
4087            // inline PK on the leading column).
4088            let already = table.indices().iter().any(|idx| {
4089                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
4090                    && table.schema().columns[idx.column_position].name == *leading
4091            });
4092            if already {
4093                continue;
4094            }
4095            let suffix = if is_pk { "pkey" } else { "key" };
4096            let idx_name = if names.len() == 1 {
4097                alloc::format!("{table_name}_{leading}_{suffix}")
4098            } else {
4099                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
4100            };
4101            if let Err(e) = table.add_index(idx_name, leading) {
4102                return Err(EngineError::Storage(e));
4103            }
4104        }
4105        Ok(QueryResult::CommandOk {
4106            affected: 0,
4107            modified_catalog: !self.in_transaction(),
4108        })
4109    }
4110
4111    fn exec_insert(&mut self, stmt: InsertStatement) -> Result<QueryResult, EngineError> {
4112        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
4113        // round-5 G4). Execute the inner SELECT first, then route
4114        // back through the regular VALUES code path with the
4115        // materialised rows.
4116        if let Some(select) = stmt.select_source.clone() {
4117            let select_result = self.exec_select_cancel(&select, CancelToken::none())?;
4118            let rows = match select_result {
4119                QueryResult::Rows { rows, .. } => rows,
4120                other => {
4121                    return Err(EngineError::Unsupported(alloc::format!(
4122                        "INSERT … SELECT: inner statement produced {other:?} instead of a row set"
4123                    )));
4124                }
4125            };
4126            let mut materialised: Vec<Vec<Expr>> = Vec::with_capacity(rows.len());
4127            for row in rows {
4128                let mut tuple: Vec<Expr> = Vec::with_capacity(row.values.len());
4129                for v in row.values {
4130                    tuple.push(value_to_literal_expr_permissive(v)?);
4131                }
4132                materialised.push(tuple);
4133            }
4134            let recurse = InsertStatement {
4135                table: stmt.table,
4136                columns: stmt.columns,
4137                rows: materialised,
4138                select_source: None,
4139                on_conflict: stmt.on_conflict,
4140                returning: stmt.returning,
4141            };
4142            return self.exec_insert(recurse);
4143        }
4144        // v7.9.21 — snapshot the clock fn pointer before the mut
4145        // borrow on the catalog opens; runtime DEFAULT eval needs
4146        // it inside the row hot loop.
4147        let clock = self.clock;
4148        // v7.12.4 — snapshot row-level triggers + their referenced
4149        // functions before the mut borrow on the catalog opens.
4150        // Cloned out so the row hot loop can fire them without
4151        // re-borrowing the catalog (which would conflict with
4152        // table.insert's mutable borrow).
4153        let before_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "BEFORE");
4154        let after_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "AFTER");
4155        let trigger_session_cfg: Option<alloc::string::String> = self
4156            .session_params
4157            .get("default_text_search_config")
4158            .cloned();
4159        let table = self
4160            .active_catalog_mut()
4161            .get_mut(&stmt.table)
4162            .ok_or_else(|| {
4163                EngineError::Storage(StorageError::TableNotFound {
4164                    name: stmt.table.clone(),
4165                })
4166            })?;
4167        // v3.1.5: clone the columns vector only (not the whole
4168        // TableSchema — saves one String alloc for the table name).
4169        // We need an owned snapshot because we'll call `table.insert`
4170        // (mutable borrow on `table`) inside the row loop while
4171        // reading schema fields.
4172        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
4173        let schema_cols_len = column_meta.len();
4174        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
4175        // column `c` is filled from the `j`-th tuple slot; `None` means
4176        // "fill with NULL". Validated once and reused for every row.
4177        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
4178            None => None, // 1-1 mapping, fast path
4179            Some(cols) => {
4180                let mut map = alloc::vec![None; schema_cols_len];
4181                for (j, name) in cols.iter().enumerate() {
4182                    let idx = column_meta
4183                        .iter()
4184                        .position(|c| c.name == *name)
4185                        .ok_or_else(|| {
4186                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
4187                        })?;
4188                    if map[idx].is_some() {
4189                        return Err(EngineError::Storage(StorageError::ArityMismatch {
4190                            expected: schema_cols_len,
4191                            actual: cols.len(),
4192                        }));
4193                    }
4194                    map[idx] = Some(j);
4195                }
4196                // Omitted columns must either be nullable, carry a
4197                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
4198                // omissions up front so the WAL stays clean.
4199                for (i, col) in column_meta.iter().enumerate() {
4200                    if map[i].is_none()
4201                        && !col.nullable
4202                        && col.default.is_none()
4203                        && col.runtime_default.is_none()
4204                        && !col.auto_increment
4205                    {
4206                        return Err(EngineError::Storage(StorageError::NullInNotNull {
4207                            column: col.name.clone(),
4208                        }));
4209                    }
4210                }
4211                Some(map)
4212            }
4213        };
4214        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
4215        // v7.6.2 — snapshot this table's FK list before the
4216        // mutable-borrow window so we can run parent lookups
4217        // against the immutable catalog after parsing. Empty vec is
4218        // the no-FK fast path; clone cost is O(fks * arity) which
4219        // is < 100 ns for typical schemas.
4220        let fks = table.schema().foreign_keys.clone();
4221        let mut affected = 0usize;
4222        // Stage 1 — parse + AUTO_INC + coerce all rows under the
4223        // single mutable borrow.
4224        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
4225        for tuple in stmt.rows {
4226            if tuple.len() != expected_tuple_len {
4227                return Err(EngineError::Storage(StorageError::ArityMismatch {
4228                    expected: expected_tuple_len,
4229                    actual: tuple.len(),
4230                }));
4231            }
4232            // Fast path: no column-list permutation → tuple slot j
4233            // maps to schema column j. We can zip schema with tuple
4234            // and skip the `raw_tuple` staging allocation entirely.
4235            let values: Vec<Value> = if let Some(map) = &tuple_pos {
4236                // Permuted path: still need raw_tuple to index by `map[i]`.
4237                let raw_tuple: Vec<Value> = tuple
4238                    .into_iter()
4239                    .map(literal_expr_to_value)
4240                    .collect::<Result<_, _>>()?;
4241                let mut out = Vec::with_capacity(schema_cols_len);
4242                for (i, col) in column_meta.iter().enumerate() {
4243                    let mut raw = match map[i] {
4244                        Some(j) => raw_tuple[j].clone(),
4245                        None => resolve_column_default_free(col, clock)?,
4246                    };
4247                    if col.auto_increment && raw.is_null() {
4248                        let next = table.next_auto_value(i).ok_or_else(|| {
4249                            EngineError::Unsupported(alloc::format!(
4250                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4251                                col.name
4252                            ))
4253                        })?;
4254                        raw = Value::BigInt(next);
4255                    }
4256                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4257                }
4258                out
4259            } else {
4260                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
4261                let mut out = Vec::with_capacity(schema_cols_len);
4262                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
4263                    let mut raw = literal_expr_to_value(expr)?;
4264                    if col.auto_increment && raw.is_null() {
4265                        let next = table.next_auto_value(i).ok_or_else(|| {
4266                            EngineError::Unsupported(alloc::format!(
4267                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4268                                col.name
4269                            ))
4270                        })?;
4271                        raw = Value::BigInt(next);
4272                    }
4273                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4274                }
4275                out
4276            };
4277            all_values.push(values);
4278        }
4279        // Stage 2 — FK enforcement on the immutable catalog.
4280        // Non-lexical lifetimes release the mutable borrow on
4281        // `table` here since stage 1 was the last use. The
4282        // parent-table lookup runs before any row is committed.
4283        let uniqueness = table.schema().uniqueness_constraints.clone();
4284        let _ = table;
4285        if !fks.is_empty() {
4286            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
4287        }
4288        // v7.13.0 — CHECK constraint enforcement (mailrs round-5 G3).
4289        enforce_check_constraints(self.active_catalog(), &stmt.table, &all_values)?;
4290        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
4291        enforce_uniqueness_inserts(self.active_catalog(), &stmt.table, &uniqueness, &all_values)?;
4292        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
4293        // Independent of table-level UniquenessConstraint (which
4294        // can't carry a predicate). Walks the table's indexes;
4295        // for each `is_unique` index, only rows whose
4296        // partial_predicate evaluates truthy are checked for
4297        // collision. mailrs K1.
4298        enforce_unique_index_inserts(self.active_catalog(), &stmt.table, &all_values)?;
4299        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
4300        //   - `DO NOTHING` filters `all_values` to non-conflicting
4301        //     rows + drops within-batch duplicates.
4302        //   - `DO UPDATE SET …` ALSO filters, but for each
4303        //     conflicting row it queues an UPDATE on the existing
4304        //     row using the incoming row's values as `EXCLUDED.*`.
4305        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
4306        let mut skipped_count = 0usize;
4307        if let Some(clause) = &stmt.on_conflict {
4308            let conflict_cols = resolve_on_conflict_columns(
4309                self.active_catalog(),
4310                &stmt.table,
4311                clause.target_columns.as_slice(),
4312            )?;
4313            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
4314            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
4315            for values in all_values {
4316                let key_tuple: Vec<&Value> = conflict_cols.iter().map(|&c| &values[c]).collect();
4317                // SQL spec: NULL in any conflict column means "no
4318                // conflict possible" (NULL ≠ NULL for uniqueness).
4319                let has_null_key = key_tuple.iter().any(|v| matches!(v, Value::Null));
4320                let collides_with_table = !has_null_key
4321                    && on_conflict_keys_exist(
4322                        self.active_catalog(),
4323                        &stmt.table,
4324                        &conflict_cols,
4325                        &key_tuple,
4326                    );
4327                let key_tuple_owned: Vec<Value> = key_tuple.iter().map(|v| (*v).clone()).collect();
4328                let collides_with_batch =
4329                    !has_null_key && seen_keys.iter().any(|k| k == &key_tuple_owned);
4330                let collides = collides_with_table || collides_with_batch;
4331                match (&clause.action, collides) {
4332                    (_, false) => {
4333                        seen_keys.push(key_tuple_owned);
4334                        kept.push(values);
4335                    }
4336                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
4337                        skipped_count += 1;
4338                    }
4339                    (
4340                        spg_sql::ast::OnConflictAction::Update {
4341                            assignments,
4342                            where_,
4343                        },
4344                        true,
4345                    ) => {
4346                        if !collides_with_table {
4347                            skipped_count += 1;
4348                            continue;
4349                        }
4350                        let target_pos = lookup_row_position_by_keys(
4351                            self.active_catalog(),
4352                            &stmt.table,
4353                            &conflict_cols,
4354                            &key_tuple,
4355                        )
4356                        .ok_or_else(|| {
4357                            EngineError::Unsupported(
4358                                "ON CONFLICT DO UPDATE: conflict detected but row \
4359                                 position could not be resolved (cold-tier row?)"
4360                                    .into(),
4361                            )
4362                        })?;
4363                        let updated = apply_on_conflict_assignments(
4364                            self.active_catalog(),
4365                            &stmt.table,
4366                            target_pos,
4367                            &values,
4368                            assignments,
4369                            where_.as_ref(),
4370                        )?;
4371                        if let Some(new_row) = updated {
4372                            pending_updates.push((target_pos, new_row));
4373                        } else {
4374                            skipped_count += 1;
4375                        }
4376                    }
4377                }
4378            }
4379            all_values = kept;
4380        }
4381        // Stage 3 — insert all rows under a fresh mutable borrow.
4382        let table = self
4383            .active_catalog_mut()
4384            .get_mut(&stmt.table)
4385            .ok_or_else(|| {
4386                EngineError::Storage(StorageError::TableNotFound {
4387                    name: stmt.table.clone(),
4388                })
4389            })?;
4390        // v7.9.4 — keep RETURNING projection rows separate per
4391        // INSERT and per UPDATE branch so DO UPDATE pushes the new
4392        // post-update state, not the incoming-only values.
4393        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
4394        // v7.12.7 — collect embedded SQL emitted by any trigger
4395        // fire across the row loop; engine drains the queue after
4396        // the table mut borrow drops.
4397        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
4398        'rowloop: for values in all_values {
4399            let mut row = Row::new(values);
4400            // v7.12.4 — BEFORE INSERT row-level triggers. Each
4401            // trigger may rewrite NEW cells (e.g. populate
4402            // `search_vector := to_tsvector(...)`) and may return
4403            // NULL to skip the row entirely.
4404            for fd in &before_insert_triggers {
4405                let (outcome, deferred) = triggers::fire_row_trigger(
4406                    fd,
4407                    Some(row.clone()),
4408                    None,
4409                    &stmt.table,
4410                    &column_meta,
4411                    &[],
4412                    trigger_session_cfg.as_deref(),
4413                    false,
4414                )
4415                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4416                deferred_embedded.extend(deferred);
4417                match outcome {
4418                    triggers::TriggerOutcome::Row(r) => row = r,
4419                    triggers::TriggerOutcome::Skip => continue 'rowloop,
4420                }
4421            }
4422            if stmt.returning.is_some() {
4423                returning_rows.push(row.values.clone());
4424            }
4425            // v7.12.4 — clone for the AFTER trigger view; insert
4426            // moves the row into the table.
4427            let inserted = row.clone();
4428            table.insert(row)?;
4429            affected += 1;
4430            // v7.12.4 — AFTER INSERT row-level triggers fire post-
4431            // write. Return value is ignored (PG semantics); we
4432            // surface any error from the body up to the caller.
4433            for fd in &after_insert_triggers {
4434                let (_outcome, deferred) = triggers::fire_row_trigger(
4435                    fd,
4436                    Some(inserted.clone()),
4437                    None,
4438                    &stmt.table,
4439                    &column_meta,
4440                    &[],
4441                    trigger_session_cfg.as_deref(),
4442                    true,
4443                )
4444                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4445                deferred_embedded.extend(deferred);
4446            }
4447        }
4448        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
4449        // in the conflict-resolution pass. update_row handles
4450        // index maintenance + body re-encoding.
4451        for (pos, new_row) in pending_updates {
4452            if stmt.returning.is_some() {
4453                returning_rows.push(new_row.clone());
4454            }
4455            table.update_row(pos, new_row)?;
4456            affected += 1;
4457        }
4458        let _ = skipped_count;
4459        // v7.12.7 — drop the table mut borrow and drain any
4460        // trigger-emitted embedded SQL queued during this INSERT.
4461        // The borrow has to release first because each deferred
4462        // stmt may UPDATE / INSERT / DELETE the same (or another)
4463        // table — including, in principle, this one.
4464        let _ = table;
4465        self.execute_deferred_trigger_stmts(deferred_embedded, CancelToken::none())?;
4466        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
4467        // up in the table after this statement (insert or
4468        // post-update on conflict).
4469        if let Some(items) = &stmt.returning {
4470            return self.build_returning_rows(&stmt.table, items, returning_rows);
4471        }
4472        // v6.2.1 — auto-analyze: track per-table modified-row
4473        // counter so the background sweep can decide when to
4474        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
4475        // — one BTreeMap entry update per INSERT batch.
4476        if !self.in_transaction() && affected > 0 {
4477            self.statistics
4478                .record_modifications(&stmt.table, affected as u64);
4479        }
4480        Ok(QueryResult::CommandOk {
4481            affected,
4482            modified_catalog: !self.in_transaction(),
4483        })
4484    }
4485
4486    /// v4.5: SELECT with cooperative cancellation. The token is
4487    /// honoured between UNION peers and inside the bare-SELECT row
4488    /// loop; HNSW kNN graph walks and the aggregate executor don't
4489    /// honour it yet (deferred — those paths bound their work
4490    /// internally by `LIMIT k` and `GROUP BY` cardinality).
4491    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
4492    /// by id, decodes each row body against the table's current
4493    /// schema, applies the SELECT's projection + optional WHERE +
4494    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
4495    /// / ORDER BY are unsupported on this path (STABILITY carve-
4496    /// out); operators wanting them should restore the segment
4497    /// into a regular table first.
4498    fn exec_select_as_of_segment(
4499        &self,
4500        stmt: &SelectStatement,
4501        from: &spg_sql::ast::FromClause,
4502        segment_id: u32,
4503    ) -> Result<QueryResult, EngineError> {
4504        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
4505        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
4506        if !from.joins.is_empty()
4507            || stmt.group_by.is_some()
4508            || stmt.having.is_some()
4509            || !stmt.unions.is_empty()
4510            || !stmt.order_by.is_empty()
4511            || stmt.offset.is_some()
4512            || stmt.distinct
4513            || aggregate::uses_aggregate(stmt)
4514        {
4515            return Err(EngineError::Unsupported(
4516                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
4517                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
4518                    .into(),
4519            ));
4520        }
4521        let table = self
4522            .active_catalog()
4523            .get(&from.primary.name)
4524            .ok_or_else(|| StorageError::TableNotFound {
4525                name: from.primary.name.clone(),
4526            })?;
4527        let schema = table.schema().clone();
4528        let schema_cols = &schema.columns;
4529        let alias = from
4530            .primary
4531            .alias
4532            .as_deref()
4533            .unwrap_or(from.primary.name.as_str());
4534        let ctx = EvalContext::new(schema_cols, Some(alias));
4535        let seg = self
4536            .active_catalog()
4537            .cold_segment(segment_id)
4538            .ok_or_else(|| {
4539                EngineError::Unsupported(alloc::format!(
4540                    "AS OF SEGMENT: cold segment {segment_id} not registered"
4541                ))
4542            })?;
4543        let mut out_rows: Vec<Row> = Vec::new();
4544        let mut limit_remaining: Option<usize> =
4545            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
4546        for (_key, body) in seg.scan() {
4547            let (row, _consumed) =
4548                spg_storage::decode_row_body_dense(&body, &schema).map_err(EngineError::Storage)?;
4549            if let Some(where_expr) = &stmt.where_ {
4550                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
4551                if !matches!(cond, Value::Bool(true)) {
4552                    continue;
4553                }
4554            }
4555            // Projection.
4556            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
4557            out_rows.push(projected);
4558            if let Some(rem) = limit_remaining.as_mut() {
4559                if *rem == 0 {
4560                    out_rows.pop();
4561                    break;
4562                }
4563                *rem -= 1;
4564            }
4565        }
4566        // Output column schema: derive from SELECT items.
4567        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
4568        Ok(QueryResult::Rows {
4569            columns,
4570            rows: out_rows,
4571        })
4572    }
4573
4574    /// v6.10.2 — simple-path WHERE eval that doesn't go through
4575    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
4576    /// scan paths predicate against a snapshot frozen segment, no
4577    /// cross-row state.
4578    fn eval_expr_simple(
4579        &self,
4580        expr: &Expr,
4581        row: &Row,
4582        ctx: &EvalContext,
4583    ) -> Result<Value, EngineError> {
4584        let cancel = CancelToken::none();
4585        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
4586    }
4587
4588    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
4589    /// Given the table name, the user-supplied projection items,
4590    /// and the mutated rows (post-insert / post-update values, or
4591    /// pre-delete snapshot), build a `QueryResult::Rows` whose
4592    /// schema describes the projected columns. Mailrs migration
4593    /// blocker #1.
4594    fn build_returning_rows(
4595        &self,
4596        table_name: &str,
4597        items: &[SelectItem],
4598        mutated_rows: Vec<Vec<Value>>,
4599    ) -> Result<QueryResult, EngineError> {
4600        let table = self.active_catalog().get(table_name).ok_or_else(|| {
4601            EngineError::Storage(StorageError::TableNotFound {
4602                name: table_name.into(),
4603            })
4604        })?;
4605        let schema_cols = table.schema().columns.clone();
4606        let columns = self.derive_output_columns(items, &schema_cols, table_name);
4607        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
4608        for values in mutated_rows {
4609            let row = Row::new(values);
4610            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
4611            out_rows.push(projected);
4612        }
4613        Ok(QueryResult::Rows {
4614            columns,
4615            rows: out_rows,
4616        })
4617    }
4618
4619    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
4620    /// `SelectItem::Wildcard` to all schema columns and
4621    /// `SelectItem::Expr` via the regular eval path.
4622    fn project_row_simple(
4623        &self,
4624        row: &Row,
4625        items: &[SelectItem],
4626        schema_cols: &[ColumnSchema],
4627        alias: &str,
4628    ) -> Result<Row, EngineError> {
4629        let ctx = EvalContext::new(schema_cols, Some(alias));
4630        let cancel = CancelToken::none();
4631        let mut out_vals = Vec::new();
4632        for item in items {
4633            match item {
4634                SelectItem::Wildcard => {
4635                    out_vals.extend(row.values.iter().cloned());
4636                }
4637                SelectItem::Expr { expr, .. } => {
4638                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
4639                    out_vals.push(v);
4640                }
4641            }
4642        }
4643        Ok(Row::new(out_vals))
4644    }
4645
4646    /// v6.10.2 — derive the output `ColumnSchema` list for an
4647    /// AS OF SEGMENT projection. Wildcards take the full schema;
4648    /// expressions take the alias if present or a synthetic
4649    /// `?column?` (PG convention) otherwise.
4650    fn derive_output_columns(
4651        &self,
4652        items: &[SelectItem],
4653        schema_cols: &[ColumnSchema],
4654        _alias: &str,
4655    ) -> Vec<ColumnSchema> {
4656        let mut out = Vec::new();
4657        for item in items {
4658            match item {
4659                SelectItem::Wildcard => {
4660                    out.extend(schema_cols.iter().cloned());
4661                }
4662                SelectItem::Expr { alias, .. } => {
4663                    let name = alias.clone().unwrap_or_else(|| "?column?".to_string());
4664                    // Default to Text; the caller's row values
4665                    // carry the actual type. v6.10.2 scope.
4666                    out.push(ColumnSchema::new(name, DataType::Text, true));
4667                }
4668            }
4669        }
4670        out
4671    }
4672
4673    fn exec_select_cancel(
4674        &self,
4675        stmt: &SelectStatement,
4676        cancel: CancelToken<'_>,
4677    ) -> Result<QueryResult, EngineError> {
4678        cancel.check()?;
4679        // v6.10.2 — cold-tier time-travel short-circuit. When the
4680        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
4681        // dedicated cold-segment scan instead of the regular
4682        // hot+index path. The scope is intentionally narrow for
4683        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
4684        // optionally with a single-column-equality WHERE. JOINs /
4685        // aggregates / ORDER BY / subqueries on top of a time-
4686        // travelled scan are STABILITY § "Out of v6.10".
4687        if let Some(from) = &stmt.from
4688            && let Some(seg_id) = from.primary.as_of_segment
4689        {
4690            return self.exec_select_as_of_segment(stmt, from, seg_id);
4691        }
4692        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
4693        // pre-CTE because they don't read from the catalog and
4694        // shouldn't participate in regular FROM resolution.
4695        if let Some(from) = &stmt.from
4696            && from.joins.is_empty()
4697            && stmt.where_.is_none()
4698            && stmt.group_by.is_none()
4699            && stmt.having.is_none()
4700            && stmt.unions.is_empty()
4701            && stmt.order_by.is_empty()
4702            && stmt.limit.is_none()
4703            && stmt.offset.is_none()
4704            && !stmt.distinct
4705            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
4706        {
4707            let lower = from.primary.name.to_ascii_lowercase();
4708            match lower.as_str() {
4709                "spg_statistic" => return Ok(self.exec_spg_statistic()),
4710                // v6.5.0 — observability v2 virtual tables.
4711                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
4712                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
4713                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
4714                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
4715                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
4716                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
4717                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
4718                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
4719                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
4720                _ => {}
4721            }
4722        }
4723        // v4.11: CTEs materialise into a temporary enriched catalog
4724        // *before* anything else — the body SELECT can then refer
4725        // to CTE names via the regular FROM-clause resolution.
4726        // Uncorrelated only: each CTE body runs once against the
4727        // current catalog, not against later CTEs' results (left-
4728        // to-right materialisation would relax this, but we keep
4729        // it simple for v4.11 MVP).
4730        if !stmt.ctes.is_empty() {
4731            return self.exec_with_ctes(stmt, cancel);
4732        }
4733        // v4.10: subqueries (uncorrelated) are resolved here, before
4734        // the executor sees the row loop. We clone the statement so
4735        // we can mutate without disturbing the caller's AST — most
4736        // queries pass through with no subquery nodes and the clone
4737        // is cheap; with subqueries the materialisation cost
4738        // dominates anyway.
4739        let mut stmt_owned;
4740        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
4741            stmt_owned = stmt.clone();
4742            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
4743            &stmt_owned
4744        } else {
4745            stmt
4746        };
4747        if stmt_ref.unions.is_empty() {
4748            return self.exec_bare_select_cancel(stmt_ref, cancel);
4749        }
4750        // UNION path: clone-strip the head into a bare block (its own
4751        // DISTINCT and any inner ORDER BY are dropped by parser rule —
4752        // the wrapper SelectStatement carries them), execute, then chain
4753        // peers with left-associative dedup semantics.
4754        let mut head = stmt_ref.clone();
4755        head.unions = Vec::new();
4756        head.order_by = Vec::new();
4757        head.limit = None;
4758        let QueryResult::Rows { columns, mut rows } =
4759            self.exec_bare_select_cancel(&head, cancel)?
4760        else {
4761            unreachable!("bare SELECT cannot return CommandOk")
4762        };
4763        for (kind, peer) in &stmt_ref.unions {
4764            let QueryResult::Rows {
4765                columns: peer_cols,
4766                rows: peer_rows,
4767            } = self.exec_bare_select_cancel(peer, cancel)?
4768            else {
4769                unreachable!("bare SELECT cannot return CommandOk")
4770            };
4771            if peer_cols.len() != columns.len() {
4772                return Err(EngineError::Unsupported(alloc::format!(
4773                    "UNION arity mismatch: head has {} columns, peer has {}",
4774                    columns.len(),
4775                    peer_cols.len()
4776                )));
4777            }
4778            rows.extend(peer_rows);
4779            if matches!(kind, UnionKind::Distinct) {
4780                rows = dedup_rows(rows);
4781            }
4782        }
4783        // ORDER BY at the top of a UNION applies to the combined result.
4784        // Eval against the projected schema (NOT the source table).
4785        if !stmt.order_by.is_empty() {
4786            let synth_ctx = EvalContext::new(&columns, None);
4787            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
4788            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
4789            for r in rows {
4790                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
4791                tagged.push((keys, r));
4792            }
4793            sort_by_keys(&mut tagged, &descs);
4794            rows = tagged.into_iter().map(|(_, r)| r).collect();
4795        }
4796        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
4797        Ok(QueryResult::Rows { columns, rows })
4798    }
4799
4800    #[allow(clippy::too_many_lines)]
4801    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
4802    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
4803    /// Synthesises a single-column virtual table whose column type
4804    /// is TEXT and whose rows are the array elements. Routes
4805    /// through the regular projection / WHERE / ORDER BY / LIMIT
4806    /// machinery so set-returning UNNEST composes naturally with
4807    /// the rest of the SELECT surface.
4808    fn exec_select_unnest(
4809        &self,
4810        stmt: &SelectStatement,
4811        primary: &TableRef,
4812        cancel: CancelToken<'_>,
4813    ) -> Result<QueryResult, EngineError> {
4814        let expr = primary
4815            .unnest_expr
4816            .as_deref()
4817            .expect("caller guards unnest_expr.is_some()");
4818        // Evaluate the array expression once. Empty schema / empty
4819        // row — uncorrelated UNNEST cannot reference outer columns.
4820        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
4821        let ctx = EvalContext::new(&empty_schema, None);
4822        let dummy_row = Row::new(alloc::vec::Vec::new());
4823        // v7.11.13 — unnest dispatches per array element type so
4824        // INT[] / BIGINT[] surface their PG types in projection.
4825        let (elem_dtype, rows): (DataType, alloc::vec::Vec<Row>) =
4826            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
4827                Value::Null => (DataType::Text, alloc::vec::Vec::new()),
4828                Value::TextArray(items) => {
4829                    let rows = items
4830                        .into_iter()
4831                        .map(|item| {
4832                            Row::new(alloc::vec![match item {
4833                                Some(s) => Value::Text(s),
4834                                None => Value::Null,
4835                            }])
4836                        })
4837                        .collect();
4838                    (DataType::Text, rows)
4839                }
4840                Value::IntArray(items) => {
4841                    let rows = items
4842                        .into_iter()
4843                        .map(|item| {
4844                            Row::new(alloc::vec![match item {
4845                                Some(n) => Value::Int(n),
4846                                None => Value::Null,
4847                            }])
4848                        })
4849                        .collect();
4850                    (DataType::Int, rows)
4851                }
4852                Value::BigIntArray(items) => {
4853                    let rows = items
4854                        .into_iter()
4855                        .map(|item| {
4856                            Row::new(alloc::vec![match item {
4857                                Some(n) => Value::BigInt(n),
4858                                None => Value::Null,
4859                            }])
4860                        })
4861                        .collect();
4862                    (DataType::BigInt, rows)
4863                }
4864                other => {
4865                    return Err(EngineError::Unsupported(alloc::format!(
4866                        "unnest() expects an array argument, got {:?}",
4867                        other.data_type()
4868                    )));
4869                }
4870            };
4871        let alias = primary
4872            .alias
4873            .clone()
4874            .unwrap_or_else(|| "unnest".to_string());
4875        // v7.13.2 — mailrs round-6 S5. Honour PG-standard
4876        // `UNNEST(arr) AS p(col_name)` column-list aliasing: the
4877        // first entry overrides the projected column's name.
4878        // Without the column list, fall back to the table alias
4879        // (pre-v7.13.2 behaviour).
4880        let col_name = primary
4881            .unnest_column_aliases
4882            .first()
4883            .cloned()
4884            .unwrap_or_else(|| alias.clone());
4885        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
4886        let schema_cols = alloc::vec![col_schema.clone()];
4887        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
4888        // Apply WHERE.
4889        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
4890            let mut out = alloc::vec::Vec::with_capacity(rows.len());
4891            for row in rows {
4892                cancel.check()?;
4893                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
4894                if matches!(v, Value::Bool(true)) {
4895                    out.push(row);
4896                }
4897            }
4898            out
4899        } else {
4900            rows
4901        };
4902        // Projection.
4903        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
4904        let mut projected_rows: alloc::vec::Vec<Row> =
4905            alloc::vec::Vec::with_capacity(filtered.len());
4906        for row in &filtered {
4907            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
4908            for p in &projection {
4909                vals.push(eval::eval_expr(&p.expr, row, &scan_ctx).map_err(EngineError::Eval)?);
4910            }
4911            projected_rows.push(Row::new(vals));
4912        }
4913        // ORDER BY / LIMIT — apply on the projected rows (cheap;
4914        // unnest result sets are small by design).
4915        let columns: alloc::vec::Vec<ColumnSchema> = projection
4916            .iter()
4917            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
4918            .collect();
4919        // Re-evaluate ORDER BY against the source schema (pre-projection
4920        // so col refs by name still resolve through `scan_ctx`).
4921        if !stmt.order_by.is_empty() {
4922            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
4923                .iter()
4924                .enumerate()
4925                .map(|(i, r)| -> Result<_, EngineError> {
4926                    let keys: Result<Vec<Value>, EngineError> = stmt
4927                        .order_by
4928                        .iter()
4929                        .map(|ob| {
4930                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
4931                        })
4932                        .collect();
4933                    Ok((i, keys?))
4934                })
4935                .collect::<Result<_, _>>()?;
4936            indexed.sort_by(|a, b| {
4937                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
4938                    let mut cmp = value_cmp(ka, kb);
4939                    if stmt.order_by[idx].desc {
4940                        cmp = cmp.reverse();
4941                    }
4942                    if cmp != core::cmp::Ordering::Equal {
4943                        return cmp;
4944                    }
4945                }
4946                core::cmp::Ordering::Equal
4947            });
4948            projected_rows = indexed
4949                .into_iter()
4950                .map(|(i, _)| projected_rows[i].clone())
4951                .collect();
4952        }
4953        // LIMIT / OFFSET — apply at the tail.
4954        if let Some(offset) = stmt.offset_literal() {
4955            let off = (offset as usize).min(projected_rows.len());
4956            projected_rows.drain(..off);
4957        }
4958        if let Some(limit) = stmt.limit_literal() {
4959            projected_rows.truncate(limit as usize);
4960        }
4961        Ok(QueryResult::Rows {
4962            columns,
4963            rows: projected_rows,
4964        })
4965    }
4966
4967    fn exec_bare_select_cancel(
4968        &self,
4969        stmt: &SelectStatement,
4970        cancel: CancelToken<'_>,
4971    ) -> Result<QueryResult, EngineError> {
4972        // v4.12: window-function path. When the projection contains
4973        // any `name(args) OVER (...)` we route to the dedicated
4974        // executor — partition + sort + per-row window value before
4975        // the regular projection.
4976        if select_has_window(stmt) {
4977            return self.exec_select_with_window(stmt, cancel);
4978        }
4979        // Constant SELECT (no FROM) — evaluate each item once against an
4980        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
4981        // `SELECT '7'::INT`. Column references will surface as
4982        // ColumnNotFound on eval since the schema is empty.
4983        let Some(from) = &stmt.from else {
4984            let empty_schema: Vec<ColumnSchema> = Vec::new();
4985            let ctx = self.ev_ctx(&empty_schema, None);
4986            let projection = build_projection(&stmt.items, &empty_schema, "")?;
4987            let dummy_row = Row::new(Vec::new());
4988            let mut values = Vec::with_capacity(projection.len());
4989            for p in &projection {
4990                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
4991            }
4992            let columns: Vec<ColumnSchema> = projection
4993                .into_iter()
4994                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4995                .collect();
4996            return Ok(QueryResult::Rows {
4997                columns,
4998                rows: alloc::vec![Row::new(values)],
4999            });
5000        };
5001        // Multi-table FROM (one or more joined peers) goes through the
5002        // nested-loop join executor. Single-table FROM stays on the
5003        // existing scan + index-seek path.
5004        if !from.joins.is_empty() {
5005            return self.exec_joined_select(stmt, from);
5006        }
5007        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
5008        // single-column table at SELECT entry by evaluating the
5009        // expression once against the empty row (UNNEST is
5010        // uncorrelated in v7.11; correlated / LATERAL unnest is a
5011        // v7.12 carve-out). Build a virtual `Table` in a heap-only
5012        // catalog, then route to the regular scan path.
5013        if from.primary.unnest_expr.is_some() {
5014            return self.exec_select_unnest(stmt, &from.primary, cancel);
5015        }
5016        let primary = &from.primary;
5017        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
5018            StorageError::TableNotFound {
5019                name: primary.name.clone(),
5020            }
5021        })?;
5022        let schema_cols = &table.schema().columns;
5023        // The qualifier accepted on column refs is the alias (if any) else the
5024        // bare table name.
5025        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
5026        let ctx = self.ev_ctx(schema_cols, Some(alias));
5027
5028        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
5029        // WHERE and an NSW index on `col` skips the full scan. The
5030        // walk returns rows already in ascending-distance order, so
5031        // ORDER BY / LIMIT are honoured implicitly.
5032        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
5033            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
5034        }
5035
5036        // Index seek: if WHERE is `col = literal` (or commuted) and the
5037        // referenced column has an index, dispatch each locator through
5038        // the catalog (hot tier → borrow, cold tier → page-read +
5039        // decode) and iterate just those rows. Otherwise fall back to a
5040        // full scan over the hot tier (cold-tier rows are only reached
5041        // via index seek in v5.1 — full table scans against cold-tier
5042        // data ship in v5.2 with the freezer's per-segment scan API).
5043        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt.where_.as_ref().and_then(|w| {
5044            // BTree / col=literal seek first — covers the v7.11.3 multi-
5045            // column AND case and the leading-column equality lookup.
5046            try_index_seek(w, schema_cols, self.active_catalog(), table, alias).or_else(|| {
5047                // v7.12.3 — GIN-accelerated `WHERE col @@ tsquery`
5048                // when the column has a `USING gin` index. Returns an
5049                // over-approximate candidate set; the WHERE re-eval
5050                // loop below verifies the full `@@` predicate per row.
5051                try_gin_seek(w, schema_cols, self.active_catalog(), table, alias, &ctx)
5052            })
5053        });
5054
5055        // Aggregate path: filter rows first, then hand off to the
5056        // aggregate executor which does its own projection + ORDER BY.
5057        if aggregate::uses_aggregate(stmt) {
5058            let mut filtered: Vec<&Row> = Vec::new();
5059            // v6.2.6 — Memoize: per-query LRU cache for correlated
5060            // scalar subqueries. Fresh per row-loop entry so each
5061            // SELECT execution gets an isolated cache.
5062            let mut memo = memoize::MemoizeCache::new();
5063            if let Some(rows) = &indexed_rows {
5064                for cow in rows {
5065                    let row = cow.as_ref();
5066                    if let Some(where_expr) = &stmt.where_ {
5067                        let cond = self.eval_expr_with_correlated(
5068                            where_expr,
5069                            row,
5070                            &ctx,
5071                            cancel,
5072                            Some(&mut memo),
5073                        )?;
5074                        if !matches!(cond, Value::Bool(true)) {
5075                            continue;
5076                        }
5077                    }
5078                    filtered.push(row);
5079                }
5080            } else {
5081                for i in 0..table.row_count() {
5082                    let row = &table.rows()[i];
5083                    if let Some(where_expr) = &stmt.where_ {
5084                        let cond = self.eval_expr_with_correlated(
5085                            where_expr,
5086                            row,
5087                            &ctx,
5088                            cancel,
5089                            Some(&mut memo),
5090                        )?;
5091                        if !matches!(cond, Value::Bool(true)) {
5092                            continue;
5093                        }
5094                    }
5095                    filtered.push(row);
5096                }
5097            }
5098            let mut agg = aggregate::run(stmt, &filtered, schema_cols, Some(alias))?;
5099            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5100            return Ok(QueryResult::Rows {
5101                columns: agg.columns,
5102                rows: agg.rows,
5103            });
5104        }
5105
5106        let projection = build_projection(&stmt.items, schema_cols, alias)?;
5107
5108        // Materialise the filter pass into `(order_key, projected_row)`
5109        // tuples. The order key is `None` when there's no ORDER BY clause.
5110        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5111        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
5112        let mut memo = memoize::MemoizeCache::new();
5113        // Inline the per-row work in a closure so the indexed and full-
5114        // scan branches share the body.
5115        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
5116            if loop_idx.is_multiple_of(256) {
5117                cancel.check()?;
5118            }
5119            if let Some(where_expr) = &stmt.where_ {
5120                let cond =
5121                    self.eval_expr_with_correlated(where_expr, row, &ctx, cancel, Some(&mut memo))?;
5122                if !matches!(cond, Value::Bool(true)) {
5123                    return Ok(());
5124                }
5125            }
5126            let mut values = Vec::with_capacity(projection.len());
5127            for p in &projection {
5128                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5129            }
5130            let order_keys = if stmt.order_by.is_empty() {
5131                Vec::new()
5132            } else {
5133                build_order_keys(&stmt.order_by, row, &ctx)?
5134            };
5135            tagged.push((order_keys, Row::new(values)));
5136            Ok(())
5137        };
5138        if let Some(rows) = &indexed_rows {
5139            for (loop_idx, cow) in rows.iter().enumerate() {
5140                process_row(cow.as_ref(), loop_idx)?;
5141            }
5142        } else {
5143            for i in 0..table.row_count() {
5144                process_row(&table.rows()[i], i)?;
5145            }
5146        }
5147
5148        if !stmt.order_by.is_empty() {
5149            // Partial-sort fast path: when LIMIT is small relative to
5150            // the row count, select_nth_unstable + sort just the
5151            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
5152            // requires the full sort because de-dup happens after.
5153            let keep = if stmt.distinct {
5154                None
5155            } else {
5156                stmt.limit_literal()
5157                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5158            };
5159            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5160            partial_sort_tagged(&mut tagged, keep, &descs);
5161        }
5162
5163        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5164        if stmt.distinct {
5165            output_rows = dedup_rows(output_rows);
5166        }
5167        apply_offset_and_limit(
5168            &mut output_rows,
5169            stmt.offset_literal(),
5170            stmt.limit_literal(),
5171        );
5172
5173        let columns: Vec<ColumnSchema> = projection
5174            .into_iter()
5175            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5176            .collect();
5177
5178        Ok(QueryResult::Rows {
5179            columns,
5180            rows: output_rows,
5181        })
5182    }
5183
5184    /// Multi-table SELECT executor (one or more JOIN peers).
5185    ///
5186    /// v1.10 builds the joined row set up-front via nested-loop joins,
5187    /// then runs WHERE + projection + ORDER BY against the combined
5188    /// rows. No index seek. Aggregates and DISTINCT still work because
5189    /// the executor delegates projection through the same shared paths.
5190    #[allow(clippy::too_many_lines)]
5191    /// v7.13.2 — mailrs round-6 S5. Resolve a TableRef into an
5192    /// owned (rows, schema) pair. Catalog tables clone their hot
5193    /// rows + schema; UNNEST table refs evaluate their array
5194    /// expression once and synthesise a single-column row set
5195    /// using the same dispatch as `exec_select_unnest`. Used by
5196    /// the joined-select path so UNNEST can appear in any FROM
5197    /// position, not just as the primary.
5198    fn materialise_table_ref(
5199        &self,
5200        tref: &TableRef,
5201    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
5202        if let Some(expr) = tref.unnest_expr.as_deref() {
5203            let empty_schema: Vec<ColumnSchema> = Vec::new();
5204            let ctx = EvalContext::new(&empty_schema, None);
5205            let dummy_row = Row::new(Vec::new());
5206            let (elem_dtype, rows) =
5207                match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5208                    Value::Null => (DataType::Text, Vec::new()),
5209                    Value::TextArray(items) => (
5210                        DataType::Text,
5211                        items
5212                            .into_iter()
5213                            .map(|item| {
5214                                Row::new(alloc::vec![match item {
5215                                    Some(s) => Value::Text(s),
5216                                    None => Value::Null,
5217                                }])
5218                            })
5219                            .collect(),
5220                    ),
5221                    Value::IntArray(items) => (
5222                        DataType::Int,
5223                        items
5224                            .into_iter()
5225                            .map(|item| {
5226                                Row::new(alloc::vec![match item {
5227                                    Some(n) => Value::Int(n),
5228                                    None => Value::Null,
5229                                }])
5230                            })
5231                            .collect(),
5232                    ),
5233                    Value::BigIntArray(items) => (
5234                        DataType::BigInt,
5235                        items
5236                            .into_iter()
5237                            .map(|item| {
5238                                Row::new(alloc::vec![match item {
5239                                    Some(n) => Value::BigInt(n),
5240                                    None => Value::Null,
5241                                }])
5242                            })
5243                            .collect(),
5244                    ),
5245                    other => {
5246                        return Err(EngineError::Unsupported(alloc::format!(
5247                            "unnest() expects an array argument, got {:?}",
5248                            other.data_type()
5249                        )));
5250                    }
5251                };
5252            let alias = tref.alias.clone().unwrap_or_else(|| "unnest".to_string());
5253            let col_name = tref
5254                .unnest_column_aliases
5255                .first()
5256                .cloned()
5257                .unwrap_or(alias);
5258            return Ok((rows, alloc::vec![ColumnSchema::new(col_name, elem_dtype, true)]));
5259        }
5260        let table = self
5261            .active_catalog()
5262            .get(&tref.name)
5263            .ok_or_else(|| StorageError::TableNotFound {
5264                name: tref.name.clone(),
5265            })?;
5266        let rows: Vec<Row> = table.rows().iter().cloned().collect();
5267        let cols = table.schema().columns.clone();
5268        Ok((rows, cols))
5269    }
5270
5271    fn exec_joined_select(
5272        &self,
5273        stmt: &SelectStatement,
5274        from: &FromClause,
5275    ) -> Result<QueryResult, EngineError> {
5276        // v7.13.2 — mailrs round-6 S5. UNNEST peers materialise
5277        // into virtual (rows, schema) sources alongside catalog
5278        // tables, so `FROM t, UNNEST(arr) AS p(col)` works in
5279        // any join-list position. The lookup helper handles both
5280        // shapes uniformly.
5281        let (primary_rows, primary_cols) = self.materialise_table_ref(&from.primary)?;
5282        let primary_alias = from
5283            .primary
5284            .alias
5285            .as_deref()
5286            .unwrap_or(from.primary.name.as_str())
5287            .to_string();
5288        // Owned (rows, schema) per peer — borrows from the catalog
5289        // would not survive UNNEST-side materialisation.
5290        let mut joined: Vec<(Vec<Row>, Vec<ColumnSchema>, String, JoinKind, Option<&Expr>)> =
5291            Vec::new();
5292        for j in &from.joins {
5293            let (rows, cols) = self.materialise_table_ref(&j.table)?;
5294            let a = j
5295                .table
5296                .alias
5297                .as_deref()
5298                .unwrap_or(j.table.name.as_str())
5299                .to_string();
5300            joined.push((rows, cols, a, j.kind, j.on.as_ref()));
5301        }
5302
5303        // Build the combined schema: composite "alias.col" names so the
5304        // qualified-column resolver can find anything by exact match.
5305        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
5306        for col in &primary_cols {
5307            combined_schema.push(ColumnSchema::new(
5308                alloc::format!("{primary_alias}.{}", col.name),
5309                col.ty,
5310                col.nullable,
5311            ));
5312        }
5313        for (_, cols, a, _, _) in &joined {
5314            for col in cols {
5315                combined_schema.push(ColumnSchema::new(
5316                    alloc::format!("{a}.{}", col.name),
5317                    col.ty,
5318                    col.nullable,
5319                ));
5320            }
5321        }
5322        let ctx = EvalContext::new(&combined_schema, None);
5323
5324        // Nested-loop join.
5325        let mut working: Vec<Row> = primary_rows;
5326        let mut produced_len = primary_cols.len();
5327        for (rrows, rcols, _, kind, on) in &joined {
5328            let right_arity = rcols.len();
5329            let mut next: Vec<Row> = Vec::new();
5330            for left in &working {
5331                let mut left_matched = false;
5332                for right in rrows {
5333                    let mut combined_vals = left.values.clone();
5334                    combined_vals.extend(right.values.iter().cloned());
5335                    // Pad combined to the eventual full width so the
5336                    // partial schema still matches positions used by ON.
5337                    let combined = Row::new(combined_vals);
5338                    let keep = if let Some(on_expr) = on {
5339                        let cond = eval::eval_expr(on_expr, &combined, &ctx)?;
5340                        matches!(cond, Value::Bool(true))
5341                    } else {
5342                        // CROSS / comma-list: every pair survives.
5343                        true
5344                    };
5345                    if keep {
5346                        next.push(combined);
5347                        left_matched = true;
5348                    }
5349                }
5350                if !left_matched && matches!(kind, JoinKind::Left) {
5351                    // LEFT OUTER JOIN: emit the left row with NULLs on
5352                    // the right side when no peer matched.
5353                    let mut combined_vals = left.values.clone();
5354                    for _ in 0..right_arity {
5355                        combined_vals.push(Value::Null);
5356                    }
5357                    next.push(Row::new(combined_vals));
5358                }
5359            }
5360            working = next;
5361            produced_len += right_arity;
5362            debug_assert!(produced_len <= combined_schema.len());
5363        }
5364
5365        // WHERE filter against combined rows.
5366        let mut filtered: Vec<Row> = Vec::new();
5367        for row in working {
5368            if let Some(where_expr) = &stmt.where_ {
5369                let cond = eval::eval_expr(where_expr, &row, &ctx)?;
5370                if !matches!(cond, Value::Bool(true)) {
5371                    continue;
5372                }
5373            }
5374            filtered.push(row);
5375        }
5376
5377        // Aggregate path: handle GROUP BY / aggregate calls over the
5378        // joined+filtered rows.
5379        if aggregate::uses_aggregate(stmt) {
5380            let refs: Vec<&Row> = filtered.iter().collect();
5381            let mut agg = aggregate::run(stmt, &refs, &combined_schema, None)?;
5382            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5383            return Ok(QueryResult::Rows {
5384                columns: agg.columns,
5385                rows: agg.rows,
5386            });
5387        }
5388
5389        let projection = build_projection(&stmt.items, &combined_schema, "")?;
5390        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5391        for row in &filtered {
5392            let mut values = Vec::with_capacity(projection.len());
5393            for p in &projection {
5394                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5395            }
5396            let order_keys = if stmt.order_by.is_empty() {
5397                Vec::new()
5398            } else {
5399                build_order_keys(&stmt.order_by, row, &ctx)?
5400            };
5401            tagged.push((order_keys, Row::new(values)));
5402        }
5403        if !stmt.order_by.is_empty() {
5404            let keep = if stmt.distinct {
5405                None
5406            } else {
5407                stmt.limit_literal()
5408                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5409            };
5410            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5411            partial_sort_tagged(&mut tagged, keep, &descs);
5412        }
5413        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5414        if stmt.distinct {
5415            output_rows = dedup_rows(output_rows);
5416        }
5417        apply_offset_and_limit(
5418            &mut output_rows,
5419            stmt.offset_literal(),
5420            stmt.limit_literal(),
5421        );
5422        let columns: Vec<ColumnSchema> = projection
5423            .into_iter()
5424            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5425            .collect();
5426        Ok(QueryResult::Rows {
5427            columns,
5428            rows: output_rows,
5429        })
5430    }
5431}
5432
5433/// One row-producing projection: an expression to evaluate, the resulting
5434/// column's user-visible name, its inferred type, and nullability.
5435#[derive(Debug, Clone)]
5436struct ProjectedItem {
5437    expr: Expr,
5438    output_name: String,
5439    ty: DataType,
5440    nullable: bool,
5441}
5442
5443/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
5444/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
5445/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
5446/// the spec's "two NULLs are not distinct"; the second is a tolerated
5447/// quirk for v1 (no NaN literals are reachable from the SQL surface).
5448fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
5449    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
5450    for r in rows {
5451        if !out.iter().any(|seen| seen == &r) {
5452            out.push(r);
5453        }
5454    }
5455    out
5456}
5457
5458/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
5459/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
5460/// order via the byte values; vectors are not sortable.
5461fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
5462    match v {
5463        Value::Null => Ok(f64::INFINITY),
5464        Value::SmallInt(n) => Ok(f64::from(*n)),
5465        Value::Int(n) => Ok(f64::from(*n)),
5466        Value::Date(d) => Ok(f64::from(*d)),
5467        #[allow(clippy::cast_precision_loss)]
5468        Value::Timestamp(t) => Ok(*t as f64),
5469        #[allow(clippy::cast_precision_loss)]
5470        Value::Numeric { scaled, scale } => {
5471            // Scaled integer / 10^scale, computed via f64 for sort
5472            // ordering only. Precision losses here only matter for
5473            // ORDER BY tie-breaks well past 15 significant digits.
5474            // `f64::powi` lives in std; we hand-roll the loop so the
5475            // no_std engine crate doesn't need it.
5476            let mut divisor = 1.0_f64;
5477            for _ in 0..*scale {
5478                divisor *= 10.0;
5479            }
5480            Ok((*scaled as f64) / divisor)
5481        }
5482        #[allow(clippy::cast_precision_loss)]
5483        Value::BigInt(n) => Ok(*n as f64),
5484        Value::Float(x) => Ok(*x),
5485        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
5486        Value::Text(s) => {
5487            // Lex order by codepoints — good enough for ORDER BY name.
5488            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
5489            // partial_cmp Equal. v1.x can swap in a real string comparator.
5490            let mut key: u64 = 0;
5491            for &b in s.as_bytes().iter().take(8) {
5492                key = (key << 8) | u64::from(b);
5493            }
5494            #[allow(clippy::cast_precision_loss)]
5495            Ok(key as f64)
5496        }
5497        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
5498            Err(EngineError::Unsupported(
5499                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
5500            ))
5501        }
5502        Value::Interval { .. } => Err(EngineError::Unsupported(
5503            "ORDER BY of an INTERVAL is not supported in v2.11 \
5504             (months vs micros has no single canonical ordering)"
5505                .into(),
5506        )),
5507        Value::Json(_) => Err(EngineError::Unsupported(
5508            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
5509        )),
5510        // v7.5.0 — Value is #[non_exhaustive]; future variants need
5511        // an explicit ORDER BY mapping. Surface as Unsupported until
5512        // engine support is added.
5513        _ => Err(EngineError::Unsupported(
5514            "ORDER BY of this value type is not supported".into(),
5515        )),
5516    }
5517}
5518
5519/// Try to plan a WHERE clause as an equality lookup against an existing
5520/// index. Returns the candidate row indices on success; `None` means the
5521/// caller should fall back to a full scan.
5522///
5523/// v0.8 recognises a single top-level `col = literal` (in either operand
5524/// order). AND chains and range scans land in later milestones.
5525/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
5526/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
5527/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
5528/// present, the planner does an "over-fetch and filter" pass — it
5529/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
5530/// against each, and trims back to `k`. Returns the row indices in
5531/// ascending-distance order when the plan applies.
5532fn try_nsw_knn(
5533    stmt: &SelectStatement,
5534    table: &Table,
5535    schema_cols: &[ColumnSchema],
5536    table_alias: &str,
5537) -> Option<Vec<usize>> {
5538    if stmt.distinct {
5539        return None;
5540    }
5541    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
5542    if limit == 0 {
5543        return None;
5544    }
5545    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
5546    // distance metric. Multi-key ORDER BY falls through to the
5547    // generic sort path.
5548    if stmt.order_by.len() != 1 {
5549        return None;
5550    }
5551    let order = &stmt.order_by[0];
5552    // NSW kNN returns rows ascending by distance — DESC inverts the
5553    // natural order, so the planner can't handle it without a sort
5554    // pass. Fall back to the generic ORDER BY path.
5555    if order.desc {
5556        return None;
5557    }
5558    let Expr::Binary { lhs, op, rhs } = &order.expr else {
5559        return None;
5560    };
5561    let metric = match op {
5562        BinOp::L2Distance => spg_storage::NswMetric::L2,
5563        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
5564        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
5565        _ => return None,
5566    };
5567    // Accept both `col <op> literal` and `literal <op> col`.
5568    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
5569        (lhs.as_ref(), rhs.as_ref())
5570    else {
5571        return None;
5572    };
5573    if let Some(q) = &col.qualifier
5574        && q != table_alias
5575    {
5576        return None;
5577    }
5578    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
5579    let query = literal_to_vector(literal)?;
5580    let idx = spg_storage::nsw_index_on(table, col_pos)?;
5581    if let Some(where_expr) = &stmt.where_ {
5582        // Over-fetch and filter. The factor (10×) is a heuristic that
5583        // covers typical selectivity for the corpus tests; v2.x will
5584        // make it configurable.
5585        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
5586        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
5587        let ctx = EvalContext::new(schema_cols, Some(table_alias));
5588        let mut kept: Vec<usize> = Vec::with_capacity(limit);
5589        for i in candidates {
5590            let row = &table.rows()[i];
5591            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
5592            if matches!(cond, Value::Bool(true)) {
5593                kept.push(i);
5594                if kept.len() >= limit {
5595                    break;
5596                }
5597            }
5598        }
5599        Some(kept)
5600    } else {
5601        Some(spg_storage::nsw_query(
5602            table, &idx.name, &query, limit, metric,
5603        ))
5604    }
5605}
5606
5607/// Lower bound on the over-fetch pool when WHERE is present — even
5608/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
5609/// few WHERE rejections.
5610const NSW_OVER_FETCH_FLOOR: usize = 32;
5611
5612/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
5613/// `None` for anything we can't fold at plan time.
5614fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
5615    match e {
5616        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
5617        Expr::Cast { expr, .. } => literal_to_vector(expr),
5618        _ => None,
5619    }
5620}
5621
5622/// Materialise rows in a planner-supplied order (used by the NSW path)
5623/// without re-running ORDER BY. The projection + LIMIT slot mirror the
5624/// equivalent block in `exec_bare_select`.
5625fn materialise_in_order(
5626    stmt: &SelectStatement,
5627    table: &Table,
5628    schema_cols: &[ColumnSchema],
5629    table_alias: &str,
5630    ordered_rows: &[usize],
5631) -> Result<QueryResult, EngineError> {
5632    let ctx = EvalContext::new(schema_cols, Some(table_alias));
5633    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
5634    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
5635    for &i in ordered_rows {
5636        let row = &table.rows()[i];
5637        let mut values = Vec::with_capacity(projection.len());
5638        for p in &projection {
5639            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5640        }
5641        output_rows.push(Row::new(values));
5642    }
5643    apply_offset_and_limit(
5644        &mut output_rows,
5645        stmt.offset_literal(),
5646        stmt.limit_literal(),
5647    );
5648    let columns: Vec<ColumnSchema> = projection
5649        .into_iter()
5650        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5651        .collect();
5652    Ok(QueryResult::Rows {
5653        columns,
5654        rows: output_rows,
5655    })
5656}
5657
5658fn try_index_seek<'a>(
5659    where_expr: &Expr,
5660    schema_cols: &[ColumnSchema],
5661    catalog: &'a Catalog,
5662    table: &'a Table,
5663    table_alias: &str,
5664) -> Option<Vec<Cow<'a, Row>>> {
5665    // v7.11.3 — recurse through top-level `AND` so a PG-style
5666    // composite predicate like `WHERE id = 1 AND created_at > $1`
5667    // still hits the index on `id`. The caller re-applies the
5668    // full WHERE expression to each returned row, so dropping the
5669    // residual conjuncts here is correct — the index just narrows
5670    // the candidate set.
5671    if let Expr::Binary {
5672        lhs,
5673        op: BinOp::And,
5674        rhs,
5675    } = where_expr
5676    {
5677        // Try LHS first (typical convention: leading equality on
5678        // the indexed column comes first in user-written SQL).
5679        if let Some(rows) = try_index_seek(lhs, schema_cols, catalog, table, table_alias) {
5680            return Some(rows);
5681        }
5682        return try_index_seek(rhs, schema_cols, catalog, table, table_alias);
5683    }
5684    let Expr::Binary {
5685        lhs,
5686        op: BinOp::Eq,
5687        rhs,
5688    } = where_expr
5689    else {
5690        return None;
5691    };
5692    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
5693        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
5694    let idx = table.index_on(col_pos)?;
5695    let key = IndexKey::from_value(&value)?;
5696    let locators = idx.lookup_eq(&key);
5697    let table_name = table.schema().name.as_str();
5698    // v5.1: each locator dispatches to either the hot tier (zero-
5699    // copy borrow of `table.rows()[i]`) or a cold-tier segment
5700    // (one page read + dense row decode, ~µs scale). Cold rows are
5701    // returned as `Cow::Owned` so the caller's `&Row` iteration
5702    // doesn't see a tier distinction; pre-freezer (no cold
5703    // segments loaded) every locator is `Hot` and every entry is
5704    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
5705    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
5706    for loc in locators {
5707        match *loc {
5708            spg_storage::RowLocator::Hot(i) => {
5709                if let Some(row) = table.rows().get(i) {
5710                    out.push(Cow::Borrowed(row));
5711                }
5712            }
5713            spg_storage::RowLocator::Cold { segment_id, .. } => {
5714                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
5715                    out.push(Cow::Owned(row));
5716                }
5717            }
5718        }
5719    }
5720    Some(out)
5721}
5722
5723/// v7.12.3 — GIN-accelerated candidate seek for `WHERE col @@ <ts_query>`.
5724///
5725/// Recurses through top-level `AND` like [`try_index_seek`] so a
5726/// composite predicate `WHERE search_vector @@ q AND id > $1` still
5727/// hits the GIN index on `search_vector` — the caller re-applies the
5728/// full WHERE expression to each returned candidate, so dropping the
5729/// `id > $1` residual here stays semantically correct.
5730///
5731/// Returns `None` when:
5732///   - no leaf is a `col @@ <rhs>` shape on a GIN-indexed column;
5733///   - the RHS can't be const-evaluated to a `Value::TsQuery`
5734///     (typically because it references row columns);
5735///   - the resolved `TsQuery` uses query shapes the MVP doesn't
5736///     accelerate (`Not`, `Phrase` — those fall through to full scan).
5737///
5738/// On `Some(rows)` the caller iterates only `rows` and re-evaluates
5739/// the full `@@` predicate per row, so an over-approximate candidate
5740/// set is safe.
5741fn try_gin_seek<'a>(
5742    where_expr: &Expr,
5743    schema_cols: &[ColumnSchema],
5744    catalog: &'a Catalog,
5745    table: &'a Table,
5746    table_alias: &str,
5747    ctx: &eval::EvalContext<'_>,
5748) -> Option<Vec<Cow<'a, Row>>> {
5749    if let Expr::Binary {
5750        lhs,
5751        op: BinOp::And,
5752        rhs,
5753    } = where_expr
5754    {
5755        if let Some(rows) = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx) {
5756            return Some(rows);
5757        }
5758        return try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx);
5759    }
5760    let Expr::Binary {
5761        lhs,
5762        op: BinOp::TsMatch,
5763        rhs,
5764    } = where_expr
5765    else {
5766        return None;
5767    };
5768    // Either side can be the column; pgvector idiom (`vec @@ q`)
5769    // hits the first arm, FROM-clause-derived (`plainto_tsquery($1)
5770    // q ... WHERE search_vector @@ q`) the same. CROSS JOIN derived
5771    // tables resolve `q` to a Column too.
5772    let (col_pos, query) = resolve_gin_col_query(lhs, rhs, schema_cols, table_alias, ctx)
5773        .or_else(|| resolve_gin_col_query(rhs, lhs, schema_cols, table_alias, ctx))?;
5774    let idx = table
5775        .indices()
5776        .iter()
5777        .find(|i| i.column_position == col_pos && i.is_gin())?;
5778    let candidates = gin_query_candidates(idx, &query)?;
5779    let _ = catalog; // cold-tier row resolution unused in MVP; see below.
5780    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(candidates.len());
5781    for loc in candidates {
5782        match loc {
5783            spg_storage::RowLocator::Hot(i) => {
5784                if let Some(row) = table.rows().get(i) {
5785                    out.push(Cow::Borrowed(row));
5786                }
5787            }
5788            // GIN cold-tier rows in the MVP: skipped, matching the
5789            // full-scan `@@` path which itself only iterates
5790            // `table.rows()` (hot tier). When v7.13+ adds cold-tier
5791            // scan-time materialisation for `@@`, the parallel
5792            // resolution lands here; until then both paths see the
5793            // same hot-only candidate set so correctness is preserved.
5794            spg_storage::RowLocator::Cold { .. } => {}
5795        }
5796    }
5797    Some(out)
5798}
5799
5800/// v7.12.3 — extract `(column_position, TsQueryAst)` when one side of
5801/// the binary is a column reference to a GIN-indexed tsvector column
5802/// and the other side const-evaluates to a `Value::TsQuery`. Returns
5803/// `None` if the column reference is for the wrong table alias, or if
5804/// the RHS expression depends on row data.
5805fn resolve_gin_col_query(
5806    col_side: &Expr,
5807    query_side: &Expr,
5808    schema_cols: &[ColumnSchema],
5809    table_alias: &str,
5810    ctx: &eval::EvalContext<'_>,
5811) -> Option<(usize, spg_storage::TsQueryAst)> {
5812    let Expr::Column(c) = col_side else {
5813        return None;
5814    };
5815    if let Some(q) = &c.qualifier
5816        && q != table_alias
5817    {
5818        return None;
5819    }
5820    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
5821    // Const-evaluate the query side with an empty row — fails fast
5822    // (with a `ColumnNotFound` / similar) if the expression actually
5823    // depends on row data, which is exactly the bail signal we want.
5824    let empty_row = Row::new(Vec::new());
5825    let v = eval::eval_expr(query_side, &empty_row, ctx).ok()?;
5826    let Value::TsQuery(q) = v else { return None };
5827    Some((pos, q))
5828}
5829
5830/// v7.12.3 — walk a `TsQueryAst` against an [`IndexKind::Gin`] index
5831/// to produce a candidate row-locator set. Returns `None` for query
5832/// shapes the MVP doesn't accelerate (`Not` / `Phrase` — both bail to
5833/// full scan since their semantics need either complementation across
5834/// the whole row set or positional verification beyond what the
5835/// posting list carries).
5836///
5837/// Candidate sets are over-approximate — the caller re-applies the
5838/// full `@@` predicate per row, so reporting "row was in some
5839/// posting list" without verifying positions / weights stays correct.
5840fn gin_query_candidates(
5841    idx: &spg_storage::Index,
5842    query: &spg_storage::TsQueryAst,
5843) -> Option<Vec<spg_storage::RowLocator>> {
5844    use spg_storage::TsQueryAst;
5845    match query {
5846        TsQueryAst::Term { word, .. } => {
5847            let mut v: Vec<spg_storage::RowLocator> = idx.gin_lookup_word(word).to_vec();
5848            v.sort_by_key(locator_sort_key);
5849            v.dedup_by_key(|l| locator_sort_key(l));
5850            Some(v)
5851        }
5852        TsQueryAst::And(l, r) => {
5853            let mut left = gin_query_candidates(idx, l)?;
5854            let mut right = gin_query_candidates(idx, r)?;
5855            left.sort_by_key(locator_sort_key);
5856            right.sort_by_key(locator_sort_key);
5857            // Sorted-merge intersection.
5858            let mut out: Vec<spg_storage::RowLocator> = Vec::new();
5859            let (mut i, mut j) = (0usize, 0usize);
5860            while i < left.len() && j < right.len() {
5861                let lk = locator_sort_key(&left[i]);
5862                let rk = locator_sort_key(&right[j]);
5863                match lk.cmp(&rk) {
5864                    core::cmp::Ordering::Less => i += 1,
5865                    core::cmp::Ordering::Greater => j += 1,
5866                    core::cmp::Ordering::Equal => {
5867                        out.push(left[i]);
5868                        i += 1;
5869                        j += 1;
5870                    }
5871                }
5872            }
5873            Some(out)
5874        }
5875        TsQueryAst::Or(l, r) => {
5876            let mut out = gin_query_candidates(idx, l)?;
5877            out.extend(gin_query_candidates(idx, r)?);
5878            out.sort_by_key(locator_sort_key);
5879            out.dedup_by_key(|l| locator_sort_key(l));
5880            Some(out)
5881        }
5882        // Not / Phrase bail to full scan in the MVP. Not needs
5883        // complementation against the whole row set (not represented
5884        // in the posting-list view); Phrase needs positional
5885        // verification beyond what `word → rows` carries.
5886        TsQueryAst::Not(_) | TsQueryAst::Phrase { .. } => None,
5887    }
5888}
5889
5890/// v7.12.3 — total ordering on `RowLocator` for sort/dedup purposes
5891/// inside the GIN intersection / union loops. Hot rows order by their
5892/// row index; Cold rows order after all Hot rows, then by
5893/// `(segment_id, the cold sub-key)`.
5894fn locator_sort_key(l: &spg_storage::RowLocator) -> (u8, u64, u64) {
5895    match *l {
5896        spg_storage::RowLocator::Hot(i) => (0, i as u64, 0),
5897        spg_storage::RowLocator::Cold {
5898            segment_id,
5899            page_offset,
5900        } => (1, u64::from(segment_id), u64::from(page_offset)),
5901    }
5902}
5903
5904/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
5905/// is a simple `col = literal` predicate suitable for a `BTree` index
5906/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
5907/// decide whether a write touches a cold-tier row (which requires
5908/// promote-on-write / shadow-on-delete) before falling through to
5909/// the hot-tier row walk.
5910///
5911/// Returns `None` for any predicate shape the planner can't push
5912/// down to an index seek — complex WHERE clauses always take the
5913/// hot-only path (cold rows are immutable to non-indexed writes
5914/// until a future scan-fanout sub-version).
5915fn try_pk_predicate(
5916    where_expr: &Expr,
5917    schema_cols: &[ColumnSchema],
5918    table_alias: &str,
5919) -> Option<(usize, IndexKey)> {
5920    let Expr::Binary {
5921        lhs,
5922        op: BinOp::Eq,
5923        rhs,
5924    } = where_expr
5925    else {
5926        return None;
5927    };
5928    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
5929        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
5930    let key = IndexKey::from_value(&value)?;
5931    Some((col_pos, key))
5932}
5933
5934fn resolve_col_literal_pair(
5935    col_side: &Expr,
5936    lit_side: &Expr,
5937    schema_cols: &[ColumnSchema],
5938    table_alias: &str,
5939) -> Option<(usize, Value)> {
5940    let Expr::Column(c) = col_side else {
5941        return None;
5942    };
5943    if let Some(q) = &c.qualifier
5944        && q != table_alias
5945    {
5946        return None;
5947    }
5948    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
5949    let Expr::Literal(l) = lit_side else {
5950        return None;
5951    };
5952    let v = match l {
5953        Literal::Integer(n) => {
5954            if let Ok(small) = i32::try_from(*n) {
5955                Value::Int(small)
5956            } else {
5957                Value::BigInt(*n)
5958            }
5959        }
5960        Literal::Float(x) => Value::Float(*x),
5961        Literal::String(s) => Value::Text(s.clone()),
5962        Literal::Bool(b) => Value::Bool(*b),
5963        Literal::Null => Value::Null,
5964        // Vector and Interval literals can't be used as B-tree index keys.
5965        // Tell the planner to fall back to full-scan.
5966        Literal::Vector(_) | Literal::Interval { .. } => return None,
5967    };
5968    Some((pos, v))
5969}
5970
5971/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
5972/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
5973/// `EngineError` so the projection-build path keeps `UnknownQualifier`
5974/// vs `ColumnNotFound` distinct.
5975fn resolve_projection_column<'a>(
5976    c: &ColumnName,
5977    schema_cols: &'a [ColumnSchema],
5978    table_alias: &str,
5979) -> Result<&'a ColumnSchema, EngineError> {
5980    if let Some(q) = &c.qualifier {
5981        let composite = alloc::format!("{q}.{name}", name = c.name);
5982        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
5983            return Ok(s);
5984        }
5985        // Single-table case: the qualifier may equal the active alias —
5986        // then look for the bare column name.
5987        if q == table_alias
5988            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
5989        {
5990            return Ok(s);
5991        }
5992        // For multi-table schemas the qualifier is unknown only if no
5993        // column bears the "<q>." prefix. For single-table, the alias
5994        // mismatch alone is enough.
5995        let prefix = alloc::format!("{q}.");
5996        let qualifier_known =
5997            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
5998        if !qualifier_known {
5999            return Err(EngineError::Eval(EvalError::UnknownQualifier {
6000                qualifier: q.clone(),
6001            }));
6002        }
6003        return Err(EngineError::Eval(EvalError::ColumnNotFound {
6004            name: c.name.clone(),
6005        }));
6006    }
6007    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
6008        return Ok(s);
6009    }
6010    let suffix = alloc::format!(".{name}", name = c.name);
6011    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
6012    let first = matches.next();
6013    let extra = matches.next();
6014    match (first, extra) {
6015        (Some(s), None) => Ok(s),
6016        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
6017            detail: alloc::format!("ambiguous column reference: {}", c.name),
6018        })),
6019        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
6020            name: c.name.clone(),
6021        })),
6022    }
6023}
6024
6025fn build_projection(
6026    items: &[SelectItem],
6027    schema_cols: &[ColumnSchema],
6028    table_alias: &str,
6029) -> Result<Vec<ProjectedItem>, EngineError> {
6030    let mut out = Vec::new();
6031    for item in items {
6032        match item {
6033            SelectItem::Wildcard => {
6034                for col in schema_cols {
6035                    out.push(ProjectedItem {
6036                        expr: Expr::Column(ColumnName {
6037                            qualifier: None,
6038                            name: col.name.clone(),
6039                        }),
6040                        output_name: col.name.clone(),
6041                        ty: col.ty,
6042                        nullable: col.nullable,
6043                    });
6044                }
6045            }
6046            SelectItem::Expr { expr, alias } => {
6047                // Plain column ref keeps full schema info (real type +
6048                // nullability). Compound expressions evaluate fine but have
6049                // no static type — surface them as nullable TEXT, which is
6050                // what most clients render anyway.
6051                if let Expr::Column(c) = expr {
6052                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
6053                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
6054                    out.push(ProjectedItem {
6055                        expr: expr.clone(),
6056                        output_name,
6057                        ty: sch.ty,
6058                        nullable: sch.nullable,
6059                    });
6060                } else {
6061                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
6062                    out.push(ProjectedItem {
6063                        expr: expr.clone(),
6064                        output_name,
6065                        ty: DataType::Text,
6066                        nullable: true,
6067                    });
6068                }
6069            }
6070        }
6071    }
6072    Ok(out)
6073}
6074
6075/// Promote an integer to a NUMERIC value at the requested scale.
6076/// Rejects values that, after scaling, would overflow the column's
6077/// precision budget.
6078fn numeric_from_integer(
6079    n: i128,
6080    precision: u8,
6081    scale: u8,
6082    col_name: &str,
6083) -> Result<Value, EngineError> {
6084    let factor = pow10_i128(scale);
6085    let scaled = n.checked_mul(factor).ok_or_else(|| {
6086        EngineError::Unsupported(alloc::format!(
6087            "integer overflow scaling value for column `{col_name}` to scale {scale}"
6088        ))
6089    })?;
6090    check_precision(scaled, precision, col_name)?;
6091    Ok(Value::Numeric { scaled, scale })
6092}
6093
6094/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
6095/// then verifies the result fits the column's precision.
6096#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
6097fn numeric_from_float(
6098    x: f64,
6099    precision: u8,
6100    scale: u8,
6101    col_name: &str,
6102) -> Result<Value, EngineError> {
6103    if !x.is_finite() {
6104        return Err(EngineError::Unsupported(alloc::format!(
6105            "cannot store non-finite float in NUMERIC column `{col_name}`"
6106        )));
6107    }
6108    let mut factor = 1.0_f64;
6109    for _ in 0..scale {
6110        factor *= 10.0;
6111    }
6112    // Round half-away-from-zero by biasing then casting (`as i128`
6113    // truncates toward zero, so the bias + truncation gives the
6114    // desired rounding). `f64::floor` / `ceil` live in std; we don't
6115    // need them — the cast handles the truncation step.
6116    let shifted = x * factor;
6117    let biased = if shifted >= 0.0 {
6118        shifted + 0.5
6119    } else {
6120        shifted - 0.5
6121    };
6122    // Range-check before casting back to i128 — the cast itself is
6123    // saturating in Rust, which would silently truncate huge inputs.
6124    if !(-1e38..=1e38).contains(&biased) {
6125        return Err(EngineError::Unsupported(alloc::format!(
6126            "value {x} overflows NUMERIC range for column `{col_name}`"
6127        )));
6128    }
6129    let scaled = biased as i128;
6130    check_precision(scaled, precision, col_name)?;
6131    Ok(Value::Numeric { scaled, scale })
6132}
6133
6134/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
6135/// multiplies by 10; going down rounds half-away-from-zero.
6136fn numeric_rescale(
6137    scaled: i128,
6138    src_scale: u8,
6139    precision: u8,
6140    dst_scale: u8,
6141    col_name: &str,
6142) -> Result<Value, EngineError> {
6143    let new_scaled = if dst_scale >= src_scale {
6144        let bump = pow10_i128(dst_scale - src_scale);
6145        scaled.checked_mul(bump).ok_or_else(|| {
6146            EngineError::Unsupported(alloc::format!(
6147                "overflow rescaling NUMERIC for column `{col_name}`"
6148            ))
6149        })?
6150    } else {
6151        let drop = pow10_i128(src_scale - dst_scale);
6152        let half = drop / 2;
6153        if scaled >= 0 {
6154            (scaled + half) / drop
6155        } else {
6156            (scaled - half) / drop
6157        }
6158    };
6159    check_precision(new_scaled, precision, col_name)?;
6160    Ok(Value::Numeric {
6161        scaled: new_scaled,
6162        scale: dst_scale,
6163    })
6164}
6165
6166/// Drop the fractional part of a scaled integer, returning the integer
6167/// portion (toward zero). Used for NUMERIC → INT casts.
6168const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
6169    if scale == 0 {
6170        return scaled;
6171    }
6172    let factor = pow10_i128_const(scale);
6173    scaled / factor
6174}
6175
6176/// Verify a scaled NUMERIC value fits the column's declared precision.
6177/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
6178/// skip the check there.
6179fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
6180    if precision == 0 {
6181        return Ok(());
6182    }
6183    let limit = pow10_i128(precision);
6184    if scaled.unsigned_abs() >= limit.unsigned_abs() {
6185        return Err(EngineError::Unsupported(alloc::format!(
6186            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
6187        )));
6188    }
6189    Ok(())
6190}
6191
6192const fn pow10_i128_const(p: u8) -> i128 {
6193    let mut acc: i128 = 1;
6194    let mut i = 0;
6195    while i < p {
6196        acc *= 10;
6197        i += 1;
6198    }
6199    acc
6200}
6201
6202fn pow10_i128(p: u8) -> i128 {
6203    pow10_i128_const(p)
6204}
6205
6206/// Walk a parsed `Statement`, swapping any `NOW()` /
6207/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
6208/// literal cast that wraps the engine's per-statement clock reading.
6209/// When `now_micros` is `None`, calls stay as-is and surface as
6210/// `unknown function` at eval time — keeps the error path explicit.
6211/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
6212/// replace every subquery node with a materialised literal. SPG
6213/// only supports uncorrelated subqueries — the inner SELECT does
6214/// not see outer-row columns, so the result is the same for every
6215/// outer row and can be evaluated once.
6216///
6217/// Returns the rewritten statement; the caller passes this to the
6218/// regular row-loop executor which no longer sees Subquery nodes
6219/// in its tree.
6220impl Engine {
6221    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
6222    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
6223    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
6224    /// 1. Apply the WHERE filter.
6225    /// 2. For each unique `WindowFunction` node in the projection,
6226    ///    partition + sort, compute the per-row value.
6227    /// 3. Append the window values as synthetic columns (`__win_N`)
6228    ///    to the row schema.
6229    /// 4. Rewrite the projection to read those columns.
6230    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
6231    #[allow(
6232        clippy::too_many_lines,
6233        clippy::type_complexity,
6234        clippy::needless_range_loop
6235    )] // window-eval is one cohesive pipe; splitting fragments
6236    fn exec_select_with_window(
6237        &self,
6238        stmt: &SelectStatement,
6239        cancel: CancelToken<'_>,
6240    ) -> Result<QueryResult, EngineError> {
6241        let from = stmt.from.as_ref().ok_or_else(|| {
6242            EngineError::Unsupported("window functions require a FROM clause".into())
6243        })?;
6244        // For v4.12 we only support a single-table FROM. Joins +
6245        // windows is queued for v5.x.
6246        if !from.joins.is_empty() {
6247            return Err(EngineError::Unsupported(
6248                "JOIN with window functions not yet supported".into(),
6249            ));
6250        }
6251        let primary = &from.primary;
6252        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
6253            StorageError::TableNotFound {
6254                name: primary.name.clone(),
6255            }
6256        })?;
6257        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
6258        let schema_cols = &table.schema().columns;
6259        let ctx = self.ev_ctx(schema_cols, Some(alias));
6260
6261        // 1) Filter pass.
6262        let mut filtered: Vec<&Row> = Vec::new();
6263        for (i, row) in table.rows().iter().enumerate() {
6264            if i.is_multiple_of(256) {
6265                cancel.check()?;
6266            }
6267            if let Some(w) = &stmt.where_ {
6268                let cond = eval::eval_expr(w, row, &ctx)?;
6269                if !matches!(cond, Value::Bool(true)) {
6270                    continue;
6271                }
6272            }
6273            filtered.push(row);
6274        }
6275        let n_rows = filtered.len();
6276
6277        // 2) Collect unique window function nodes from projection.
6278        let mut window_nodes: Vec<Expr> = Vec::new();
6279        for item in &stmt.items {
6280            if let SelectItem::Expr { expr, .. } = item {
6281                collect_window_nodes(expr, &mut window_nodes);
6282            }
6283        }
6284
6285        // 3) For each window, compute per-row value.
6286        // Index: same order as window_nodes; for row i, win_vals[w][i].
6287        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
6288        for wnode in &window_nodes {
6289            let Expr::WindowFunction {
6290                name,
6291                args,
6292                partition_by,
6293                order_by,
6294                frame,
6295                null_treatment,
6296            } = wnode
6297            else {
6298                unreachable!("collect_window_nodes pushes only WindowFunction");
6299            };
6300            // Compute (partition_key, order_key, original_index) for each row.
6301            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool)>, usize)> =
6302                Vec::with_capacity(n_rows);
6303            for (i, row) in filtered.iter().enumerate() {
6304                let pkey: Vec<Value> = partition_by
6305                    .iter()
6306                    .map(|p| eval::eval_expr(p, row, &ctx))
6307                    .collect::<Result<_, _>>()?;
6308                let okey: Vec<(Value, bool)> = order_by
6309                    .iter()
6310                    .map(|(e, desc)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc)))
6311                    .collect::<Result<_, _>>()?;
6312                indexed.push((pkey, okey, i));
6313            }
6314            // Sort by (partition_key, order_key). Partition key uses
6315            // a stable encoded form; order key respects ASC/DESC.
6316            indexed.sort_by(|a, b| {
6317                let p_cmp = partition_key_cmp(&a.0, &b.0);
6318                if p_cmp != core::cmp::Ordering::Equal {
6319                    return p_cmp;
6320                }
6321                order_key_cmp(&a.1, &b.1)
6322            });
6323            // Per-partition compute.
6324            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
6325            let mut p_start = 0;
6326            while p_start < indexed.len() {
6327                let mut p_end = p_start + 1;
6328                while p_end < indexed.len()
6329                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
6330                        == core::cmp::Ordering::Equal
6331                {
6332                    p_end += 1;
6333                }
6334                // Compute the function within this partition slice.
6335                compute_window_partition(
6336                    name,
6337                    args,
6338                    !order_by.is_empty(),
6339                    frame.as_ref(),
6340                    *null_treatment,
6341                    &indexed[p_start..p_end],
6342                    &filtered,
6343                    &ctx,
6344                    &mut out_vals,
6345                )?;
6346                p_start = p_end;
6347            }
6348            win_vals.push(out_vals);
6349        }
6350
6351        // 4) Build extended schema: original columns + synthetic.
6352        let mut ext_cols = schema_cols.clone();
6353        for i in 0..window_nodes.len() {
6354            ext_cols.push(ColumnSchema::new(
6355                alloc::format!("__win_{i}"),
6356                DataType::Text, // type doesn't matter for projection eval
6357                true,
6358            ));
6359        }
6360        // 5) Build extended rows: each row gets its window values appended.
6361        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
6362        for i in 0..n_rows {
6363            let mut values = filtered[i].values.clone();
6364            for w in 0..window_nodes.len() {
6365                values.push(win_vals[w][i].clone());
6366            }
6367            ext_rows.push(Row::new(values));
6368        }
6369        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
6370        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
6371        for item in &stmt.items {
6372            let new_item = match item {
6373                SelectItem::Wildcard => SelectItem::Wildcard,
6374                SelectItem::Expr { expr, alias } => {
6375                    let mut e = expr.clone();
6376                    rewrite_window_to_columns(&mut e, &window_nodes);
6377                    SelectItem::Expr {
6378                        expr: e,
6379                        alias: alias.clone(),
6380                    }
6381                }
6382            };
6383            rewritten_items.push(new_item);
6384        }
6385
6386        // 7) Project into final rows.
6387        let ext_ctx = EvalContext::new(&ext_cols, Some(alias));
6388        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
6389        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
6390        for (i, row) in ext_rows.iter().enumerate() {
6391            if i.is_multiple_of(256) {
6392                cancel.check()?;
6393            }
6394            let mut values = Vec::with_capacity(projection.len());
6395            for p in &projection {
6396                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
6397            }
6398            let order_keys = if stmt.order_by.is_empty() {
6399                Vec::new()
6400            } else {
6401                let mut keys = Vec::with_capacity(stmt.order_by.len());
6402                for o in &stmt.order_by {
6403                    let mut e = o.expr.clone();
6404                    rewrite_window_to_columns(&mut e, &window_nodes);
6405                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
6406                    keys.push(value_to_order_key(&key)?);
6407                }
6408                keys
6409            };
6410            tagged.push((order_keys, Row::new(values)));
6411        }
6412        // ORDER BY + LIMIT/OFFSET on the projected rows.
6413        if !stmt.order_by.is_empty() {
6414            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
6415            sort_by_keys(&mut tagged, &descs);
6416        }
6417        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
6418        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
6419        let final_cols: Vec<ColumnSchema> = projection
6420            .into_iter()
6421            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6422            .collect();
6423        Ok(QueryResult::Rows {
6424            columns: final_cols,
6425            rows: out_rows,
6426        })
6427    }
6428
6429    /// v4.11: materialise each CTE into a temp table inside a
6430    /// cloned catalog, then run the body SELECT against a fresh
6431    /// engine instance that owns the enriched catalog. The clone
6432    /// is moderately expensive — only paid by CTE-bearing queries.
6433    /// Subqueries inside CTE bodies / the main body resolve as
6434    /// usual; `clock_fn` is propagated so `NOW()` lines up.
6435    fn exec_with_ctes(
6436        &self,
6437        stmt: &SelectStatement,
6438        cancel: CancelToken<'_>,
6439    ) -> Result<QueryResult, EngineError> {
6440        cancel.check()?;
6441        let mut catalog = self.active_catalog().clone();
6442        for cte in &stmt.ctes {
6443            if catalog.get(&cte.name).is_some() {
6444                return Err(EngineError::Unsupported(alloc::format!(
6445                    "CTE name {:?} shadows an existing table; rename the CTE",
6446                    cte.name
6447                )));
6448            }
6449            let (columns, rows) = if cte.recursive {
6450                self.materialise_recursive_cte(cte, &catalog, cancel)?
6451            } else {
6452                let body_result = self.exec_select_cancel(&cte.body, cancel)?;
6453                let QueryResult::Rows { columns, rows } = body_result else {
6454                    return Err(EngineError::Unsupported(alloc::format!(
6455                        "CTE {:?} body did not return rows",
6456                        cte.name
6457                    )));
6458                };
6459                (columns, rows)
6460            };
6461            // v4.22: the projection builder labels any non-column
6462            // expression as Text — including literal SELECT 1.
6463            // Promote each column's type to whatever the rows
6464            // actually carry so the CTE storage table accepts them.
6465            let inferred = infer_column_types(&columns, &rows);
6466            let mut columns = inferred;
6467            // v4.22: apply optional `WITH name(a, b, c)` overrides.
6468            if !cte.column_overrides.is_empty() {
6469                if cte.column_overrides.len() != columns.len() {
6470                    return Err(EngineError::Unsupported(alloc::format!(
6471                        "CTE {:?} column list has {} names but body returns {} columns",
6472                        cte.name,
6473                        cte.column_overrides.len(),
6474                        columns.len()
6475                    )));
6476                }
6477                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
6478                    col.name.clone_from(name);
6479                }
6480            }
6481            let schema = TableSchema::new(cte.name.clone(), columns);
6482            catalog.create_table(schema).map_err(EngineError::Storage)?;
6483            let table = catalog
6484                .get_mut(&cte.name)
6485                .expect("just-created CTE table must exist");
6486            for row in rows {
6487                table.insert(row).map_err(EngineError::Storage)?;
6488            }
6489        }
6490        // Strip CTEs from the body before running on the temp engine
6491        // so we don't recurse forever.
6492        let mut body = stmt.clone();
6493        body.ctes = Vec::new();
6494        let mut temp = Engine::restore(catalog);
6495        if let Some(c) = self.clock {
6496            temp = temp.with_clock(c);
6497        }
6498        if let Some(f) = self.salt_fn {
6499            temp = temp.with_salt_fn(f);
6500        }
6501        temp.exec_select_cancel(&body, cancel)
6502    }
6503
6504    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
6505    /// UNION (or UNION ALL) of an anchor that does not reference
6506    /// the CTE name, and one or more recursive terms that do. The
6507    /// anchor runs first; each subsequent iteration runs the
6508    /// recursive term against a temp catalog where the CTE name is
6509    /// bound to the *previous* iteration's output. Iteration stops
6510    /// when the recursive term yields no rows; UNION (DISTINCT)
6511    /// deduplicates against the accumulated result, UNION ALL does
6512    /// not. A hard cap on total rows prevents runaway queries.
6513    #[allow(clippy::too_many_lines)]
6514    fn materialise_recursive_cte(
6515        &self,
6516        cte: &spg_sql::ast::Cte,
6517        base_catalog: &Catalog,
6518        cancel: CancelToken<'_>,
6519    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
6520        const MAX_TOTAL_ROWS: usize = 1_000_000;
6521        const MAX_ITERATIONS: usize = 100_000;
6522        cancel.check()?;
6523        if cte.body.unions.is_empty() {
6524            return Err(EngineError::Unsupported(alloc::format!(
6525                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
6526                cte.name
6527            )));
6528        }
6529        // Anchor: the body's leading SELECT, with unions stripped.
6530        let mut anchor = cte.body.clone();
6531        let union_terms = core::mem::take(&mut anchor.unions);
6532        anchor.ctes = Vec::new();
6533        // Anchor must not reference the CTE name.
6534        if select_refers_to(&anchor, &cte.name) {
6535            return Err(EngineError::Unsupported(alloc::format!(
6536                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
6537                cte.name
6538            )));
6539        }
6540        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
6541        let QueryResult::Rows {
6542            columns: anchor_cols,
6543            rows: anchor_rows,
6544        } = anchor_result
6545        else {
6546            return Err(EngineError::Unsupported(alloc::format!(
6547                "WITH RECURSIVE {:?}: anchor did not return rows",
6548                cte.name
6549            )));
6550        };
6551        // The projection builder labels non-column expressions Text;
6552        // refine column types from the anchor's actual values so the
6553        // intermediate iter-catalog tables accept them.
6554        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
6555        if !cte.column_overrides.is_empty() {
6556            if cte.column_overrides.len() != columns.len() {
6557                return Err(EngineError::Unsupported(alloc::format!(
6558                    "CTE {:?} column list has {} names but anchor returns {} columns",
6559                    cte.name,
6560                    cte.column_overrides.len(),
6561                    columns.len()
6562                )));
6563            }
6564            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
6565                col.name.clone_from(name);
6566            }
6567        }
6568        let mut all_rows: Vec<Row> = anchor_rows.clone();
6569        let mut working_set: Vec<Row> = anchor_rows;
6570        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
6571        // Track at least one "all UNION ALL" flag — if every union
6572        // kind is ALL we skip the dedup step (faster + matches PG).
6573        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
6574        if !all_union_all {
6575            for r in &all_rows {
6576                seen.insert(encode_row_key(r));
6577            }
6578        }
6579        for iter in 0..MAX_ITERATIONS {
6580            cancel.check()?;
6581            if working_set.is_empty() {
6582                break;
6583            }
6584            // Build a fresh catalog: base + CTE bound to working_set.
6585            let mut iter_catalog = base_catalog.clone();
6586            let schema = TableSchema::new(cte.name.clone(), columns.clone());
6587            iter_catalog
6588                .create_table(schema)
6589                .map_err(EngineError::Storage)?;
6590            {
6591                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
6592                for row in &working_set {
6593                    table.insert(row.clone()).map_err(EngineError::Storage)?;
6594                }
6595            }
6596            let mut iter_engine = Engine::restore(iter_catalog);
6597            if let Some(c) = self.clock {
6598                iter_engine = iter_engine.with_clock(c);
6599            }
6600            if let Some(f) = self.salt_fn {
6601                iter_engine = iter_engine.with_salt_fn(f);
6602            }
6603            // Run each recursive term in sequence and collect new rows.
6604            let mut next_set: Vec<Row> = Vec::new();
6605            for (_, term) in &union_terms {
6606                let mut term = term.clone();
6607                term.ctes = Vec::new();
6608                let r = iter_engine.exec_select_cancel(&term, cancel)?;
6609                let QueryResult::Rows {
6610                    columns: rc,
6611                    rows: rs,
6612                } = r
6613                else {
6614                    return Err(EngineError::Unsupported(alloc::format!(
6615                        "WITH RECURSIVE {:?}: recursive term did not return rows",
6616                        cte.name
6617                    )));
6618                };
6619                if rc.len() != columns.len() {
6620                    return Err(EngineError::Unsupported(alloc::format!(
6621                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
6622                        cte.name,
6623                        rc.len(),
6624                        columns.len()
6625                    )));
6626                }
6627                for row in rs {
6628                    if !all_union_all {
6629                        let key = encode_row_key(&row);
6630                        if !seen.insert(key) {
6631                            continue;
6632                        }
6633                    }
6634                    next_set.push(row);
6635                }
6636            }
6637            if next_set.is_empty() {
6638                break;
6639            }
6640            all_rows.extend(next_set.iter().cloned());
6641            working_set = next_set;
6642            if all_rows.len() > MAX_TOTAL_ROWS {
6643                return Err(EngineError::Unsupported(alloc::format!(
6644                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
6645                    cte.name
6646                )));
6647            }
6648            if iter + 1 == MAX_ITERATIONS {
6649                return Err(EngineError::Unsupported(alloc::format!(
6650                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
6651                    cte.name
6652                )));
6653            }
6654        }
6655        Ok((columns, all_rows))
6656    }
6657
6658    fn resolve_select_subqueries(
6659        &self,
6660        stmt: &mut SelectStatement,
6661        cancel: CancelToken<'_>,
6662    ) -> Result<(), EngineError> {
6663        for item in &mut stmt.items {
6664            if let SelectItem::Expr { expr, .. } = item {
6665                self.resolve_expr_subqueries(expr, cancel)?;
6666            }
6667        }
6668        if let Some(w) = &mut stmt.where_ {
6669            self.resolve_expr_subqueries(w, cancel)?;
6670        }
6671        if let Some(gs) = &mut stmt.group_by {
6672            for g in gs {
6673                self.resolve_expr_subqueries(g, cancel)?;
6674            }
6675        }
6676        if let Some(h) = &mut stmt.having {
6677            self.resolve_expr_subqueries(h, cancel)?;
6678        }
6679        for o in &mut stmt.order_by {
6680            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
6681        }
6682        for (_, peer) in &mut stmt.unions {
6683            self.resolve_select_subqueries(peer, cancel)?;
6684        }
6685        Ok(())
6686    }
6687
6688    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
6689    fn resolve_expr_subqueries(
6690        &self,
6691        e: &mut Expr,
6692        cancel: CancelToken<'_>,
6693    ) -> Result<(), EngineError> {
6694        // Replace-on-this-node cases first.
6695        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
6696            *e = replacement;
6697            return Ok(());
6698        }
6699        match e {
6700            Expr::Binary { lhs, rhs, .. } => {
6701                self.resolve_expr_subqueries(lhs, cancel)?;
6702                self.resolve_expr_subqueries(rhs, cancel)?;
6703            }
6704            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6705                self.resolve_expr_subqueries(expr, cancel)?;
6706            }
6707            Expr::FunctionCall { args, .. } => {
6708                for a in args {
6709                    self.resolve_expr_subqueries(a, cancel)?;
6710                }
6711            }
6712            Expr::Like { expr, pattern, .. } => {
6713                self.resolve_expr_subqueries(expr, cancel)?;
6714                self.resolve_expr_subqueries(pattern, cancel)?;
6715            }
6716            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
6717            // v4.12 window functions — recurse into args + ORDER BY
6718            // + PARTITION BY in case they carry inner subqueries.
6719            Expr::WindowFunction {
6720                args,
6721                partition_by,
6722                order_by,
6723                ..
6724            } => {
6725                for a in args {
6726                    self.resolve_expr_subqueries(a, cancel)?;
6727                }
6728                for p in partition_by {
6729                    self.resolve_expr_subqueries(p, cancel)?;
6730                }
6731                for (e, _) in order_by {
6732                    self.resolve_expr_subqueries(e, cancel)?;
6733                }
6734            }
6735            // Subquery nodes are handled in subquery_replacement
6736            // (which returned None — defensive no-op); Literal /
6737            // Column are leaves.
6738            Expr::ScalarSubquery(_)
6739            | Expr::Exists { .. }
6740            | Expr::InSubquery { .. }
6741            | Expr::Literal(_)
6742            | Expr::Placeholder(_)
6743            | Expr::Column(_) => {}
6744            // v7.10.10 — recurse children.
6745            Expr::Array(items) => {
6746                for elem in items {
6747                    self.resolve_expr_subqueries(elem, cancel)?;
6748                }
6749            }
6750            Expr::ArraySubscript { target, index } => {
6751                self.resolve_expr_subqueries(target, cancel)?;
6752                self.resolve_expr_subqueries(index, cancel)?;
6753            }
6754            Expr::AnyAll { expr, array, .. } => {
6755                self.resolve_expr_subqueries(expr, cancel)?;
6756                self.resolve_expr_subqueries(array, cancel)?;
6757            }
6758            Expr::Case {
6759                operand,
6760                branches,
6761                else_branch,
6762            } => {
6763                if let Some(o) = operand {
6764                    self.resolve_expr_subqueries(o, cancel)?;
6765                }
6766                for (w, t) in branches {
6767                    self.resolve_expr_subqueries(w, cancel)?;
6768                    self.resolve_expr_subqueries(t, cancel)?;
6769                }
6770                if let Some(e) = else_branch {
6771                    self.resolve_expr_subqueries(e, cancel)?;
6772                }
6773            }
6774        }
6775        Ok(())
6776    }
6777
6778    /// v4.23: per-row eval that handles correlated subqueries.
6779    /// Equivalent to `eval::eval_expr` when the expression has no
6780    /// subqueries; otherwise clones the expression, substitutes
6781    /// outer-row columns into each surviving subquery node, runs
6782    /// the inner SELECT, and replaces the node with the literal
6783    /// result. Only the WHERE-filter call sites use this path so
6784    /// the uncorrelated fast path is preserved everywhere else.
6785    fn eval_expr_with_correlated(
6786        &self,
6787        expr: &Expr,
6788        row: &Row,
6789        ctx: &EvalContext<'_>,
6790        cancel: CancelToken<'_>,
6791        memo: Option<&mut memoize::MemoizeCache>,
6792    ) -> Result<Value, EngineError> {
6793        if !expr_has_subquery(expr) {
6794            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
6795        }
6796        let mut e = expr.clone();
6797        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
6798        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
6799    }
6800
6801    fn resolve_correlated_in_expr(
6802        &self,
6803        e: &mut Expr,
6804        row: &Row,
6805        ctx: &EvalContext<'_>,
6806        cancel: CancelToken<'_>,
6807        mut memo: Option<&mut memoize::MemoizeCache>,
6808    ) -> Result<(), EngineError> {
6809        match e {
6810            Expr::ScalarSubquery(inner) => {
6811                // v6.2.6 — Memoize: build the cache key from the
6812                // pre-substitution subquery repr + the outer row's
6813                // values. Two outer rows with identical correlated
6814                // values hit the same entry.
6815                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
6816                    subquery_repr: alloc::format!("{}", **inner),
6817                    outer_values: row.values.clone(),
6818                });
6819                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
6820                    && let Some(cached) = cache.get(k)
6821                {
6822                    *e = value_to_literal_expr(cached)?;
6823                    return Ok(());
6824                }
6825                let mut s = (**inner).clone();
6826                substitute_outer_columns(&mut s, row, ctx);
6827                let r = self.exec_select_cancel(&s, cancel)?;
6828                let QueryResult::Rows { rows, .. } = r else {
6829                    return Err(EngineError::Unsupported(
6830                        "scalar subquery: inner did not return rows".into(),
6831                    ));
6832                };
6833                let value = match rows.as_slice() {
6834                    [] => Value::Null,
6835                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
6836                    _ => {
6837                        return Err(EngineError::Unsupported(alloc::format!(
6838                            "scalar subquery returned {} rows; expected 0 or 1",
6839                            rows.len()
6840                        )));
6841                    }
6842                };
6843                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
6844                    cache.insert(k, value.clone());
6845                }
6846                *e = value_to_literal_expr(value)?;
6847            }
6848            Expr::Exists { subquery, negated } => {
6849                let mut s = (**subquery).clone();
6850                substitute_outer_columns(&mut s, row, ctx);
6851                let r = self.exec_select_cancel(&s, cancel)?;
6852                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
6853                let bit = if *negated { !exists } else { exists };
6854                *e = Expr::Literal(Literal::Bool(bit));
6855            }
6856            Expr::InSubquery {
6857                expr: lhs,
6858                subquery,
6859                negated,
6860            } => {
6861                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
6862                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
6863                let mut s = (**subquery).clone();
6864                substitute_outer_columns(&mut s, row, ctx);
6865                let r = self.exec_select_cancel(&s, cancel)?;
6866                let QueryResult::Rows { columns, rows, .. } = r else {
6867                    return Err(EngineError::Unsupported(
6868                        "IN-subquery: inner did not return rows".into(),
6869                    ));
6870                };
6871                if columns.len() != 1 {
6872                    return Err(EngineError::Unsupported(alloc::format!(
6873                        "IN-subquery must project exactly one column; got {}",
6874                        columns.len()
6875                    )));
6876                }
6877                let mut found = false;
6878                let mut any_null = false;
6879                for r0 in rows {
6880                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
6881                    if v.is_null() {
6882                        any_null = true;
6883                        continue;
6884                    }
6885                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
6886                        found = true;
6887                        break;
6888                    }
6889                }
6890                let bit = if found {
6891                    !*negated
6892                } else if any_null {
6893                    return Err(EngineError::Unsupported(
6894                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
6895                    ));
6896                } else {
6897                    *negated
6898                };
6899                *e = Expr::Literal(Literal::Bool(bit));
6900            }
6901            Expr::Binary { lhs, rhs, .. } => {
6902                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
6903                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
6904            }
6905            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6906                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
6907            }
6908            Expr::Like { expr, pattern, .. } => {
6909                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
6910                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
6911            }
6912            Expr::FunctionCall { args, .. } => {
6913                for a in args {
6914                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
6915                }
6916            }
6917            Expr::Extract { source, .. } => {
6918                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
6919            }
6920            Expr::WindowFunction { .. }
6921            | Expr::Literal(_)
6922            | Expr::Placeholder(_)
6923            | Expr::Column(_) => {}
6924            // v7.10.10 — recurse children.
6925            Expr::Array(items) => {
6926                for elem in items {
6927                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
6928                }
6929            }
6930            Expr::ArraySubscript { target, index } => {
6931                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
6932                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
6933            }
6934            Expr::AnyAll { expr, array, .. } => {
6935                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
6936                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
6937            }
6938            Expr::Case {
6939                operand,
6940                branches,
6941                else_branch,
6942            } => {
6943                if let Some(o) = operand {
6944                    self.resolve_correlated_in_expr(o, row, ctx, cancel, memo.as_deref_mut())?;
6945                }
6946                for (w, t) in branches {
6947                    self.resolve_correlated_in_expr(w, row, ctx, cancel, memo.as_deref_mut())?;
6948                    self.resolve_correlated_in_expr(t, row, ctx, cancel, memo.as_deref_mut())?;
6949                }
6950                if let Some(e) = else_branch {
6951                    self.resolve_correlated_in_expr(e, row, ctx, cancel, memo.as_deref_mut())?;
6952                }
6953            }
6954        }
6955        Ok(())
6956    }
6957
6958    fn subquery_replacement(
6959        &self,
6960        e: &Expr,
6961        cancel: CancelToken<'_>,
6962    ) -> Result<Option<Expr>, EngineError> {
6963        match e {
6964            Expr::ScalarSubquery(inner) => {
6965                let mut s = (**inner).clone();
6966                // Recurse into the inner SELECT first so nested
6967                // subqueries materialise bottom-up.
6968                self.resolve_select_subqueries(&mut s, cancel)?;
6969                let r = match self.exec_bare_select_cancel(&s, cancel) {
6970                    Ok(r) => r,
6971                    Err(e) if is_correlation_error(&e) => return Ok(None),
6972                    Err(e) => return Err(e),
6973                };
6974                let QueryResult::Rows { rows, .. } = r else {
6975                    return Err(EngineError::Unsupported(
6976                        "scalar subquery: inner statement did not return rows".into(),
6977                    ));
6978                };
6979                let value = match rows.as_slice() {
6980                    [] => Value::Null,
6981                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
6982                    _ => {
6983                        return Err(EngineError::Unsupported(alloc::format!(
6984                            "scalar subquery returned {} rows; expected 0 or 1",
6985                            rows.len()
6986                        )));
6987                    }
6988                };
6989                Ok(Some(value_to_literal_expr(value)?))
6990            }
6991            Expr::Exists { subquery, negated } => {
6992                let mut s = (**subquery).clone();
6993                self.resolve_select_subqueries(&mut s, cancel)?;
6994                let r = match self.exec_bare_select_cancel(&s, cancel) {
6995                    Ok(r) => r,
6996                    Err(e) if is_correlation_error(&e) => return Ok(None),
6997                    Err(e) => return Err(e),
6998                };
6999                let exists = match r {
7000                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
7001                    QueryResult::CommandOk { .. } => false,
7002                };
7003                let bit = if *negated { !exists } else { exists };
7004                Ok(Some(Expr::Literal(Literal::Bool(bit))))
7005            }
7006            Expr::InSubquery {
7007                expr,
7008                subquery,
7009                negated,
7010            } => {
7011                let mut s = (**subquery).clone();
7012                self.resolve_select_subqueries(&mut s, cancel)?;
7013                let r = match self.exec_bare_select_cancel(&s, cancel) {
7014                    Ok(r) => r,
7015                    Err(e) if is_correlation_error(&e) => return Ok(None),
7016                    Err(e) => return Err(e),
7017                };
7018                let QueryResult::Rows { columns, rows, .. } = r else {
7019                    return Err(EngineError::Unsupported(
7020                        "IN-subquery: inner statement did not return rows".into(),
7021                    ));
7022                };
7023                if columns.len() != 1 {
7024                    return Err(EngineError::Unsupported(alloc::format!(
7025                        "IN-subquery must project exactly one column; got {}",
7026                        columns.len()
7027                    )));
7028                }
7029                // Build the same OR-Eq chain the parse-time literal-list
7030                // path constructs, with each value lifted into a Literal.
7031                let mut acc: Option<Expr> = None;
7032                for row in rows {
7033                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
7034                    let lit = value_to_literal_expr(v)?;
7035                    let cmp = Expr::Binary {
7036                        lhs: expr.clone(),
7037                        op: BinOp::Eq,
7038                        rhs: Box::new(lit),
7039                    };
7040                    acc = Some(match acc {
7041                        None => cmp,
7042                        Some(prev) => Expr::Binary {
7043                            lhs: Box::new(prev),
7044                            op: BinOp::Or,
7045                            rhs: Box::new(cmp),
7046                        },
7047                    });
7048                }
7049                let combined = acc.unwrap_or(Expr::Literal(Literal::Bool(false)));
7050                let final_expr = if *negated {
7051                    Expr::Unary {
7052                        op: UnOp::Not,
7053                        expr: Box::new(combined),
7054                    }
7055                } else {
7056                    combined
7057                };
7058                Ok(Some(final_expr))
7059            }
7060            _ => Ok(None),
7061        }
7062    }
7063}
7064
7065// ---- v4.12 window-function helpers ----
7066// The (partition-key, order-key, original-index) tuple shape used
7067// across these helpers is intrinsic to the planner. Factoring it
7068// into a typedef adds indirection without making the code clearer,
7069// so several lints are allowed inline on the affected functions
7070// rather than module-wide.
7071
7072/// v4.22: cheap structural scan for `FROM <name>` (qualified or
7073/// not) inside a SELECT — used to verify the anchor of a WITH
7074/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
7075/// FROM joins, subqueries, and unions.
7076fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
7077    if let Some(from) = &stmt.from
7078        && from_refers_to(from, target)
7079    {
7080        return true;
7081    }
7082    for (_, peer) in &stmt.unions {
7083        if select_refers_to(peer, target) {
7084            return true;
7085        }
7086    }
7087    for item in &stmt.items {
7088        if let SelectItem::Expr { expr, .. } = item
7089            && expr_refers_to(expr, target)
7090        {
7091            return true;
7092        }
7093    }
7094    if let Some(w) = &stmt.where_
7095        && expr_refers_to(w, target)
7096    {
7097        return true;
7098    }
7099    false
7100}
7101
7102fn from_refers_to(from: &FromClause, target: &str) -> bool {
7103    if from.primary.name.eq_ignore_ascii_case(target) {
7104        return true;
7105    }
7106    from.joins
7107        .iter()
7108        .any(|j| j.table.name.eq_ignore_ascii_case(target))
7109}
7110
7111fn expr_refers_to(e: &Expr, target: &str) -> bool {
7112    match e {
7113        Expr::ScalarSubquery(s) => select_refers_to(s, target),
7114        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
7115            select_refers_to(subquery, target)
7116        }
7117        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
7118        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7119            expr_refers_to(expr, target)
7120        }
7121        Expr::Like { expr, pattern, .. } => {
7122            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
7123        }
7124        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
7125        Expr::Extract { source, .. } => expr_refers_to(source, target),
7126        Expr::WindowFunction {
7127            args,
7128            partition_by,
7129            order_by,
7130            ..
7131        } => {
7132            args.iter().any(|a| expr_refers_to(a, target))
7133                || partition_by.iter().any(|p| expr_refers_to(p, target))
7134                || order_by.iter().any(|(o, _)| expr_refers_to(o, target))
7135        }
7136        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
7137        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
7138        Expr::ArraySubscript { target: t, index } => {
7139            expr_refers_to(t, target) || expr_refers_to(index, target)
7140        }
7141        Expr::AnyAll { expr, array, .. } => {
7142            expr_refers_to(expr, target) || expr_refers_to(array, target)
7143        }
7144        Expr::Case {
7145            operand,
7146            branches,
7147            else_branch,
7148        } => {
7149            operand.as_deref().is_some_and(|o| expr_refers_to(o, target))
7150                || branches
7151                    .iter()
7152                    .any(|(w, t)| expr_refers_to(w, target) || expr_refers_to(t, target))
7153                || else_branch
7154                    .as_deref()
7155                    .is_some_and(|e| expr_refers_to(e, target))
7156        }
7157    }
7158}
7159
7160/// v4.22: pick more specific column types from observed rows when
7161/// the projection builder defaulted to Text (the v1.x behavior for
7162/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
7163/// land an Int column in the CTE storage table rather than failing
7164/// the insert with "expected TEXT, got INT".
7165fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
7166    let mut out = columns.to_vec();
7167    for (col_idx, col) in out.iter_mut().enumerate() {
7168        if col.ty != DataType::Text {
7169            continue;
7170        }
7171        let mut inferred: Option<DataType> = None;
7172        let mut all_null = true;
7173        for row in rows {
7174            let Some(v) = row.values.get(col_idx) else {
7175                continue;
7176            };
7177            let ty = match v {
7178                Value::Null => continue,
7179                Value::SmallInt(_) => DataType::SmallInt,
7180                Value::Int(_) => DataType::Int,
7181                Value::BigInt(_) => DataType::BigInt,
7182                Value::Float(_) => DataType::Float,
7183                Value::Bool(_) => DataType::Bool,
7184                Value::Vector(_) => DataType::Vector {
7185                    dim: 0,
7186                    encoding: VecEncoding::F32,
7187                },
7188                _ => DataType::Text,
7189            };
7190            all_null = false;
7191            inferred = Some(match inferred {
7192                None => ty,
7193                Some(prev) if prev == ty => prev,
7194                Some(_) => DataType::Text,
7195            });
7196        }
7197        if let Some(t) = inferred {
7198            col.ty = t;
7199            col.nullable = true;
7200        } else if all_null {
7201            col.nullable = true;
7202        }
7203    }
7204    out
7205}
7206
7207/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
7208/// Lines are pushed into `out`; `depth` controls indentation. We
7209/// describe the rewritten SELECT — what the executor *would* do —
7210/// using the engine handle to spot indexed lookups and table shapes.
7211#[allow(clippy::too_many_lines, clippy::format_push_string)]
7212/// v6.2.4 — Walk every line of the rendered plan tree and append
7213/// per-operator stats. Lines that name a known operator get
7214/// `(rows=N)` (`actual_rows` of the top-level operator equals the
7215/// final result row count; scans report their catalog row count
7216/// as the rows-considered metric). Other lines — Filter / Join /
7217/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
7218/// complete-by-construction; v6.2.5 fills these in via inline
7219/// executor counters.
7220/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
7221/// `(table, column)` pair the query touches via WHERE / JOIN
7222/// that doesn't already have an index on the owning table.
7223/// Walks the SELECT's FROM clauses + WHERE expression tree;
7224/// returns one line per missing index. Deterministic order:
7225/// FROM-clause iteration order, then column-reference walk
7226/// order inside each WHERE. Each suggestion is a copy-pastable
7227/// DDL string.
7228fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
7229    use alloc::collections::BTreeSet;
7230    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
7231    let mut out: Vec<String> = Vec::new();
7232    let cat = engine.active_catalog();
7233    // Build a (table, qualifier-or-alias) list from the FROM clause
7234    // so unqualified column refs in WHERE resolve to the correct
7235    // table.
7236    let Some(from) = &stmt.from else {
7237        return out;
7238    };
7239    let mut tables: Vec<String> = Vec::new();
7240    tables.push(from.primary.name.clone());
7241    for j in &from.joins {
7242        tables.push(j.table.name.clone());
7243    }
7244    // Collect column refs from the WHERE expression. JOIN ON
7245    // predicates also feed in.
7246    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
7247    if let Some(w) = &stmt.where_ {
7248        collect_column_refs(w, &mut col_refs);
7249    }
7250    for j in &from.joins {
7251        if let Some(on) = &j.on {
7252            collect_column_refs(on, &mut col_refs);
7253        }
7254    }
7255    for cn in &col_refs {
7256        // Resolve owner table: explicit qualifier first, else
7257        // first table in FROM that has a column of this name.
7258        let owner: Option<String> = if let Some(q) = &cn.qualifier {
7259            tables.iter().find(|t| t == &q).cloned()
7260        } else {
7261            tables.iter().find_map(|t| {
7262                cat.get(t).and_then(|tbl| {
7263                    if tbl.schema().column_position(&cn.name).is_some() {
7264                        Some(t.clone())
7265                    } else {
7266                        None
7267                    }
7268                })
7269            })
7270        };
7271        let Some(owner) = owner else {
7272            continue;
7273        };
7274        let Some(tbl) = cat.get(&owner) else {
7275            continue;
7276        };
7277        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
7278            continue;
7279        };
7280        // Skip if any BTree index already covers this column as
7281        // its key.
7282        let already_indexed = tbl.indices().iter().any(|i| {
7283            matches!(i.kind, spg_storage::IndexKind::BTree(_))
7284                && i.column_position == col_pos
7285                && i.expression.is_none()
7286                && i.partial_predicate.is_none()
7287        });
7288        if already_indexed {
7289            continue;
7290        }
7291        if seen.insert((owner.clone(), cn.name.clone())) {
7292            out.push(alloc::format!(
7293                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
7294                owner,
7295                cn.name,
7296                owner,
7297                cn.name
7298            ));
7299        }
7300    }
7301    out
7302}
7303
7304/// Walks an `Expr` and pushes every `ColumnName` it references.
7305/// Order is depth-first, left-to-right.
7306fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
7307    match expr {
7308        Expr::Column(cn) => out.push(cn.clone()),
7309        Expr::FunctionCall { args, .. } => {
7310            for a in args {
7311                collect_column_refs(a, out);
7312            }
7313        }
7314        Expr::Binary { lhs, rhs, .. } => {
7315            collect_column_refs(lhs, out);
7316            collect_column_refs(rhs, out);
7317        }
7318        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
7319        _ => {}
7320    }
7321}
7322
7323fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
7324    let catalog = engine.active_catalog();
7325    let cold_ids = catalog.cold_segment_ids_global();
7326    let any_cold = !cold_ids.is_empty();
7327    let cold_ids_repr = if any_cold {
7328        let mut s = alloc::string::String::from("[");
7329        for (i, id) in cold_ids.iter().enumerate() {
7330            if i > 0 {
7331                s.push(',');
7332            }
7333            s.push_str(&alloc::format!("{id}"));
7334        }
7335        s.push(']');
7336        s
7337    } else {
7338        alloc::string::String::new()
7339    };
7340    for (idx, line) in lines.iter_mut().enumerate() {
7341        let trimmed = line.trim_start();
7342        let is_top_level = idx == 0;
7343        if is_top_level {
7344            line.push_str(&alloc::format!(" (rows={total_rows})"));
7345            continue;
7346        }
7347        if let Some(rest) = trimmed.strip_prefix("From: ") {
7348            let (name, scan_kind) = match rest.split_once(" [") {
7349                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
7350                None => (rest.trim(), ""),
7351            };
7352            let bare = name.split_whitespace().next().unwrap_or(name);
7353            let hot = catalog.get(bare).map(|t| t.rows().len());
7354            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
7355            // cold-tier segment the scan COULD have walked. v6.2.x
7356            // can tighten to per-table by walking the table's
7357            // BTree-index cold locators.
7358            let annot = match (hot, scan_kind) {
7359                (Some(h), "full scan") => {
7360                    let mut s = alloc::format!(" (hot_rows={h}");
7361                    if any_cold {
7362                        s.push_str(&alloc::format!(
7363                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7364                        ));
7365                    }
7366                    s.push(')');
7367                    s
7368                }
7369                (Some(h), "index seek") => {
7370                    let mut s = alloc::format!(" (hot_rows≤{h}");
7371                    if any_cold {
7372                        s.push_str(&alloc::format!(
7373                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7374                        ));
7375                    }
7376                    s.push(')');
7377                    s
7378                }
7379                _ => " (rows=—)".to_string(),
7380            };
7381            line.push_str(&annot);
7382            continue;
7383        }
7384        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
7385        line.push_str(" (rows=—)");
7386    }
7387}
7388
7389fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
7390    let pad = "  ".repeat(depth);
7391    // 1) Top-level operator label.
7392    let top = if !stmt.ctes.is_empty() {
7393        if stmt.ctes.iter().any(|c| c.recursive) {
7394            "CTEScan (WITH RECURSIVE)"
7395        } else {
7396            "CTEScan (WITH)"
7397        }
7398    } else if !stmt.unions.is_empty() {
7399        "UnionScan"
7400    } else if select_has_window(stmt) {
7401        "WindowAgg"
7402    } else if aggregate::uses_aggregate(stmt) {
7403        "Aggregate"
7404    } else if stmt.distinct {
7405        "Distinct"
7406    } else if stmt.from.is_some() {
7407        "TableScan"
7408    } else {
7409        "Result"
7410    };
7411    out.push(alloc::format!("{pad}{top}"));
7412    let child = "  ".repeat(depth + 1);
7413    // 2) CTE bodies.
7414    for cte in &stmt.ctes {
7415        let head = if cte.recursive {
7416            alloc::format!("{child}CTE (recursive): {}", cte.name)
7417        } else {
7418            alloc::format!("{child}CTE: {}", cte.name)
7419        };
7420        out.push(head);
7421        explain_select(&cte.body, engine, depth + 2, out);
7422    }
7423    // 3) FROM details — primary table + joins, index hits.
7424    if let Some(from) = &stmt.from {
7425        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
7426        if let Some(alias) = &from.primary.alias {
7427            tag.push_str(&alloc::format!(" AS {alias}"));
7428        }
7429        // Try to detect an index-seek opportunity on WHERE against
7430        // the primary table — same heuristic the executor uses.
7431        if let Some(w) = &stmt.where_
7432            && let Some(table) = engine.active_catalog().get(&from.primary.name)
7433        {
7434            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
7435            let cols = &table.schema().columns;
7436            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
7437                tag.push_str(" [index seek]");
7438            } else {
7439                tag.push_str(" [full scan]");
7440            }
7441        } else {
7442            tag.push_str(" [full scan]");
7443        }
7444        out.push(tag);
7445        for j in &from.joins {
7446            let kind = match j.kind {
7447                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
7448                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
7449                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
7450            };
7451            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
7452            if let Some(alias) = &j.table.alias {
7453                s.push_str(&alloc::format!(" AS {alias}"));
7454            }
7455            if j.on.is_some() {
7456                s.push_str(" (ON …)");
7457            }
7458            out.push(s);
7459        }
7460    }
7461    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
7462    if let Some(w) = &stmt.where_ {
7463        let mut s = alloc::format!("{child}Filter: {w}");
7464        if expr_has_subquery(w) {
7465            s.push_str(" [subquery]");
7466        }
7467        out.push(s);
7468    }
7469    if let Some(gs) = &stmt.group_by {
7470        let mut parts = Vec::new();
7471        for g in gs {
7472            parts.push(alloc::format!("{g}"));
7473        }
7474        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
7475    }
7476    if let Some(h) = &stmt.having {
7477        out.push(alloc::format!("{child}Having: {h}"));
7478    }
7479    for o in &stmt.order_by {
7480        let dir = if o.desc { "DESC" } else { "ASC" };
7481        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
7482    }
7483    if let Some(lim) = stmt.limit {
7484        out.push(alloc::format!("{child}Limit: {lim}"));
7485    }
7486    if let Some(off) = stmt.offset {
7487        out.push(alloc::format!("{child}Offset: {off}"));
7488    }
7489    // 5) Projection — collapse Wildcard or render N items.
7490    if stmt
7491        .items
7492        .iter()
7493        .any(|it| matches!(it, SelectItem::Wildcard))
7494    {
7495        out.push(alloc::format!("{child}Project: *"));
7496    } else {
7497        out.push(alloc::format!(
7498            "{child}Project: {} item(s)",
7499            stmt.items.len()
7500        ));
7501    }
7502    // 6) Recurse into UNION peers.
7503    for (kind, peer) in &stmt.unions {
7504        let label = match kind {
7505            UnionKind::All => "UNION ALL",
7506            UnionKind::Distinct => "UNION",
7507        };
7508        out.push(alloc::format!("{child}{label}"));
7509        explain_select(peer, engine, depth + 2, out);
7510    }
7511}
7512
7513/// v4.23: recognise the engine errors that indicate the inner
7514/// SELECT couldn't be evaluated in isolation because it references
7515/// an outer column — used by `subquery_replacement` to skip
7516/// materialisation and let row-eval handle it instead.
7517fn is_correlation_error(e: &EngineError) -> bool {
7518    matches!(
7519        e,
7520        EngineError::Eval(
7521            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
7522        )
7523    )
7524}
7525
7526/// v4.23: walk every Expr in `stmt` and replace each Column ref
7527/// that targets the outer scope (qualifier matches the outer
7528/// table alias) with a Literal carrying the outer row's value.
7529/// Conservative: only qualified refs are substituted, so the user
7530/// must write `outer_alias.col` to reference an outer column. This
7531/// matches PG's lexical scoping for correlated subqueries and
7532/// avoids accidentally rebinding inner columns of the same name.
7533fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
7534    let Some(outer_alias) = ctx.table_alias else {
7535        return;
7536    };
7537    substitute_in_select(stmt, row, ctx, outer_alias);
7538}
7539
7540fn substitute_in_select(
7541    stmt: &mut SelectStatement,
7542    row: &Row,
7543    ctx: &EvalContext<'_>,
7544    outer_alias: &str,
7545) {
7546    for item in &mut stmt.items {
7547        if let SelectItem::Expr { expr, .. } = item {
7548            substitute_in_expr(expr, row, ctx, outer_alias);
7549        }
7550    }
7551    if let Some(w) = &mut stmt.where_ {
7552        substitute_in_expr(w, row, ctx, outer_alias);
7553    }
7554    if let Some(gs) = &mut stmt.group_by {
7555        for g in gs {
7556            substitute_in_expr(g, row, ctx, outer_alias);
7557        }
7558    }
7559    if let Some(h) = &mut stmt.having {
7560        substitute_in_expr(h, row, ctx, outer_alias);
7561    }
7562    for o in &mut stmt.order_by {
7563        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
7564    }
7565    for (_, peer) in &mut stmt.unions {
7566        substitute_in_select(peer, row, ctx, outer_alias);
7567    }
7568}
7569
7570fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
7571    if let Expr::Column(c) = e
7572        && let Some(qual) = &c.qualifier
7573        && qual.eq_ignore_ascii_case(outer_alias)
7574    {
7575        // Look up the column's index in the outer schema.
7576        if let Some(idx) = ctx
7577            .columns
7578            .iter()
7579            .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
7580        {
7581            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
7582            if let Ok(lit) = value_to_literal_expr(v) {
7583                *e = lit;
7584                return;
7585            }
7586        }
7587    }
7588    match e {
7589        Expr::Binary { lhs, rhs, .. } => {
7590            substitute_in_expr(lhs, row, ctx, outer_alias);
7591            substitute_in_expr(rhs, row, ctx, outer_alias);
7592        }
7593        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7594            substitute_in_expr(expr, row, ctx, outer_alias);
7595        }
7596        Expr::Like { expr, pattern, .. } => {
7597            substitute_in_expr(expr, row, ctx, outer_alias);
7598            substitute_in_expr(pattern, row, ctx, outer_alias);
7599        }
7600        Expr::FunctionCall { args, .. } => {
7601            for a in args {
7602                substitute_in_expr(a, row, ctx, outer_alias);
7603            }
7604        }
7605        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
7606        Expr::WindowFunction {
7607            args,
7608            partition_by,
7609            order_by,
7610            ..
7611        } => {
7612            for a in args {
7613                substitute_in_expr(a, row, ctx, outer_alias);
7614            }
7615            for p in partition_by {
7616                substitute_in_expr(p, row, ctx, outer_alias);
7617            }
7618            for (o, _) in order_by {
7619                substitute_in_expr(o, row, ctx, outer_alias);
7620            }
7621        }
7622        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
7623        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
7624            substitute_in_select(subquery, row, ctx, outer_alias);
7625        }
7626        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
7627        Expr::Array(items) => {
7628            for elem in items {
7629                substitute_in_expr(elem, row, ctx, outer_alias);
7630            }
7631        }
7632        Expr::ArraySubscript { target, index } => {
7633            substitute_in_expr(target, row, ctx, outer_alias);
7634            substitute_in_expr(index, row, ctx, outer_alias);
7635        }
7636        Expr::AnyAll { expr, array, .. } => {
7637            substitute_in_expr(expr, row, ctx, outer_alias);
7638            substitute_in_expr(array, row, ctx, outer_alias);
7639        }
7640        Expr::Case {
7641            operand,
7642            branches,
7643            else_branch,
7644        } => {
7645            if let Some(o) = operand {
7646                substitute_in_expr(o, row, ctx, outer_alias);
7647            }
7648            for (w, t) in branches {
7649                substitute_in_expr(w, row, ctx, outer_alias);
7650                substitute_in_expr(t, row, ctx, outer_alias);
7651            }
7652            if let Some(e) = else_branch {
7653                substitute_in_expr(e, row, ctx, outer_alias);
7654            }
7655        }
7656    }
7657}
7658
7659/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
7660/// dedup inside the recursive iteration. Crude but deterministic
7661/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
7662fn encode_row_key(row: &Row) -> Vec<u8> {
7663    let mut out = Vec::new();
7664    for v in &row.values {
7665        let s = alloc::format!("{v:?}|");
7666        out.extend_from_slice(s.as_bytes());
7667    }
7668    out
7669}
7670
7671fn select_has_window(stmt: &SelectStatement) -> bool {
7672    for item in &stmt.items {
7673        if let SelectItem::Expr { expr, .. } = item
7674            && expr_has_window(expr)
7675        {
7676            return true;
7677        }
7678    }
7679    false
7680}
7681
7682fn expr_has_window(e: &Expr) -> bool {
7683    match e {
7684        Expr::WindowFunction { .. } => true,
7685        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
7686        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7687            expr_has_window(expr)
7688        }
7689        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
7690        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
7691        Expr::Extract { source, .. } => expr_has_window(source),
7692        Expr::ScalarSubquery(_)
7693        | Expr::Exists { .. }
7694        | Expr::InSubquery { .. }
7695        | Expr::Literal(_)
7696        | Expr::Placeholder(_)
7697        | Expr::Column(_) => false,
7698        Expr::Array(items) => items.iter().any(expr_has_window),
7699        Expr::ArraySubscript { target, index } => expr_has_window(target) || expr_has_window(index),
7700        Expr::AnyAll { expr, array, .. } => expr_has_window(expr) || expr_has_window(array),
7701        Expr::Case {
7702            operand,
7703            branches,
7704            else_branch,
7705        } => {
7706            operand.as_deref().is_some_and(expr_has_window)
7707                || branches
7708                    .iter()
7709                    .any(|(w, t)| expr_has_window(w) || expr_has_window(t))
7710                || else_branch.as_deref().is_some_and(expr_has_window)
7711        }
7712    }
7713}
7714
7715fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
7716    if let Expr::WindowFunction { .. } = e {
7717        // Deduplicate by structural equality on the expression
7718        // (cheap because window args + partition + order are
7719        // small). Without dedup we'd recompute identical windows
7720        // once per occurrence in the projection.
7721        if !out.iter().any(|x| x == e) {
7722            out.push(e.clone());
7723        }
7724        return;
7725    }
7726    match e {
7727        // Already handled by the early-return at the top.
7728        Expr::WindowFunction { .. } => unreachable!(),
7729        Expr::Binary { lhs, rhs, .. } => {
7730            collect_window_nodes(lhs, out);
7731            collect_window_nodes(rhs, out);
7732        }
7733        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7734            collect_window_nodes(expr, out);
7735        }
7736        Expr::FunctionCall { args, .. } => {
7737            for a in args {
7738                collect_window_nodes(a, out);
7739            }
7740        }
7741        Expr::Like { expr, pattern, .. } => {
7742            collect_window_nodes(expr, out);
7743            collect_window_nodes(pattern, out);
7744        }
7745        Expr::Extract { source, .. } => collect_window_nodes(source, out),
7746        _ => {}
7747    }
7748}
7749
7750fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
7751    if let Expr::WindowFunction { .. } = e
7752        && let Some(idx) = window_nodes.iter().position(|w| w == e)
7753    {
7754        *e = Expr::Column(spg_sql::ast::ColumnName {
7755            qualifier: None,
7756            name: alloc::format!("__win_{idx}"),
7757        });
7758        return;
7759    }
7760    match e {
7761        Expr::Binary { lhs, rhs, .. } => {
7762            rewrite_window_to_columns(lhs, window_nodes);
7763            rewrite_window_to_columns(rhs, window_nodes);
7764        }
7765        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7766            rewrite_window_to_columns(expr, window_nodes);
7767        }
7768        Expr::FunctionCall { args, .. } => {
7769            for a in args {
7770                rewrite_window_to_columns(a, window_nodes);
7771            }
7772        }
7773        Expr::Like { expr, pattern, .. } => {
7774            rewrite_window_to_columns(expr, window_nodes);
7775            rewrite_window_to_columns(pattern, window_nodes);
7776        }
7777        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
7778        _ => {}
7779    }
7780}
7781
7782/// Total order over partition-key tuples. NULL sorts as the
7783/// lowest value (matches the `<` partial order's NULL-last
7784/// behaviour with `INFINITY` flipped).
7785fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
7786    for (x, y) in a.iter().zip(b.iter()) {
7787        let c = value_cmp(x, y);
7788        if c != core::cmp::Ordering::Equal {
7789            return c;
7790        }
7791    }
7792    a.len().cmp(&b.len())
7793}
7794
7795fn order_key_cmp(a: &[(Value, bool)], b: &[(Value, bool)]) -> core::cmp::Ordering {
7796    for ((va, desc), (vb, _)) in a.iter().zip(b.iter()) {
7797        let c = value_cmp(va, vb);
7798        let c = if *desc { c.reverse() } else { c };
7799        if c != core::cmp::Ordering::Equal {
7800            return c;
7801        }
7802    }
7803    a.len().cmp(&b.len())
7804}
7805
7806#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
7807fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
7808    use core::cmp::Ordering;
7809    match (a, b) {
7810        (Value::Null, Value::Null) => Ordering::Equal,
7811        (Value::Null, _) => Ordering::Less,
7812        (_, Value::Null) => Ordering::Greater,
7813        (Value::Int(x), Value::Int(y)) => x.cmp(y),
7814        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
7815        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
7816        (Value::Text(x), Value::Text(y)) => x.cmp(y),
7817        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
7818        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
7819        (Value::Date(x), Value::Date(y)) => x.cmp(y),
7820        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
7821        // Cross-type compare: fall back to the debug rendering —
7822        // same-partition is the goal, exact order is irrelevant.
7823        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
7824    }
7825}
7826
7827/// Compute the window function's per-row output for one partition.
7828/// `slice` has (partition key, order key, original-row-index)
7829/// tuples already sorted by order key. `filtered_rows` is the
7830/// full row list indexed by original-row-index. `out_vals` is
7831/// the destination, also indexed by original-row-index.
7832#[allow(
7833    clippy::too_many_arguments,
7834    clippy::cast_possible_truncation,
7835    clippy::cast_possible_wrap,
7836    clippy::cast_precision_loss,
7837    clippy::cast_sign_loss,
7838    clippy::doc_markdown,
7839    clippy::too_many_lines,
7840    clippy::type_complexity,
7841    clippy::match_same_arms
7842)]
7843fn compute_window_partition(
7844    name: &str,
7845    args: &[Expr],
7846    ordered: bool,
7847    frame: Option<&WindowFrame>,
7848    null_treatment: spg_sql::ast::NullTreatment,
7849    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
7850    filtered_rows: &[&Row],
7851    ctx: &EvalContext<'_>,
7852    out_vals: &mut [Value],
7853) -> Result<(), EngineError> {
7854    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
7855    let lower = name.to_ascii_lowercase();
7856    match lower.as_str() {
7857        "row_number" => {
7858            for (rank, (_, _, idx)) in slice.iter().enumerate() {
7859                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
7860            }
7861            Ok(())
7862        }
7863        "rank" => {
7864            let mut prev_key: Option<&[(Value, bool)]> = None;
7865            let mut current_rank: i64 = 1;
7866            for (i, (_, okey, idx)) in slice.iter().enumerate() {
7867                if let Some(p) = prev_key
7868                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
7869                {
7870                    current_rank = (i + 1) as i64;
7871                }
7872                if prev_key.is_none() {
7873                    current_rank = 1;
7874                }
7875                out_vals[*idx] = Value::BigInt(current_rank);
7876                prev_key = Some(okey.as_slice());
7877            }
7878            Ok(())
7879        }
7880        "dense_rank" => {
7881            let mut prev_key: Option<&[(Value, bool)]> = None;
7882            let mut current_rank: i64 = 0;
7883            for (_, okey, idx) in slice {
7884                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
7885                    current_rank += 1;
7886                }
7887                out_vals[*idx] = Value::BigInt(current_rank);
7888                prev_key = Some(okey.as_slice());
7889            }
7890            Ok(())
7891        }
7892        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
7893            // Pre-evaluate the function arg per row in the slice
7894            // (count_star has no arg).
7895            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
7896                slice.iter().map(|_| Value::Null).collect()
7897            } else {
7898                slice
7899                    .iter()
7900                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
7901                    .collect::<Result<_, _>>()
7902                    .map_err(EngineError::Eval)?
7903            };
7904            // v4.20: pick the effective frame. Explicit frame
7905            // overrides the implicit default (running for ordered,
7906            // whole-partition for unordered).
7907            let eff = effective_frame(frame, ordered)?;
7908            #[allow(clippy::needless_range_loop)]
7909            for i in 0..slice.len() {
7910                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
7911                let mut sum: f64 = 0.0;
7912                let mut count: i64 = 0;
7913                let mut min_v: Option<f64> = None;
7914                let mut max_v: Option<f64> = None;
7915                let mut row_count: i64 = 0;
7916                if lo <= hi {
7917                    for j in lo..=hi {
7918                        let v = &arg_values[j];
7919                        match lower.as_str() {
7920                            "count_star" => row_count += 1,
7921                            "count" => {
7922                                if !v.is_null() {
7923                                    count += 1;
7924                                }
7925                            }
7926                            _ => {
7927                                if let Some(x) = value_to_f64(v) {
7928                                    sum += x;
7929                                    count += 1;
7930                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
7931                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
7932                                }
7933                            }
7934                        }
7935                    }
7936                }
7937                let value = match lower.as_str() {
7938                    "count_star" => Value::BigInt(row_count),
7939                    "count" => Value::BigInt(count),
7940                    "sum" => Value::Float(sum),
7941                    "avg" => {
7942                        if count == 0 {
7943                            Value::Null
7944                        } else {
7945                            Value::Float(sum / count as f64)
7946                        }
7947                    }
7948                    "min" => min_v.map_or(Value::Null, Value::Float),
7949                    "max" => max_v.map_or(Value::Null, Value::Float),
7950                    _ => unreachable!(),
7951                };
7952                let (_, _, idx) = &slice[i];
7953                out_vals[*idx] = value;
7954            }
7955            Ok(())
7956        }
7957        "lag" | "lead" => {
7958            // lag(expr [, offset [, default]])
7959            // lead(expr [, offset [, default]])
7960            if args.is_empty() {
7961                return Err(EngineError::Unsupported(alloc::format!(
7962                    "{lower}() requires at least one argument"
7963                )));
7964            }
7965            let offset: i64 = if args.len() >= 2 {
7966                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
7967                    .map_err(EngineError::Eval)?;
7968                match v {
7969                    Value::SmallInt(n) => i64::from(n),
7970                    Value::Int(n) => i64::from(n),
7971                    Value::BigInt(n) => n,
7972                    _ => {
7973                        return Err(EngineError::Unsupported(alloc::format!(
7974                            "{lower}() offset must be integer"
7975                        )));
7976                    }
7977                }
7978            } else {
7979                1
7980            };
7981            let default: Value = if args.len() >= 3 {
7982                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
7983                    .map_err(EngineError::Eval)?
7984            } else {
7985                Value::Null
7986            };
7987            let values: Vec<Value> = slice
7988                .iter()
7989                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
7990                .collect::<Result<_, _>>()
7991                .map_err(EngineError::Eval)?;
7992            let n = slice.len();
7993            for (i, (_, _, idx)) in slice.iter().enumerate() {
7994                let signed_offset = if lower == "lag" { -offset } else { offset };
7995                let v = if ignore_nulls {
7996                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
7997                    // skipping NULL values; the `offset`-th non-NULL
7998                    // encountered is the result.
7999                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
8000                    let needed: i64 = signed_offset.abs();
8001                    if needed == 0 {
8002                        values[i].clone()
8003                    } else {
8004                        let mut j: i64 = i as i64;
8005                        let mut hits: i64 = 0;
8006                        let mut found: Option<Value> = None;
8007                        loop {
8008                            j += step;
8009                            if j < 0 || j >= n as i64 {
8010                                break;
8011                            }
8012                            #[allow(clippy::cast_sign_loss)]
8013                            let v = &values[j as usize];
8014                            if !v.is_null() {
8015                                hits += 1;
8016                                if hits == needed {
8017                                    found = Some(v.clone());
8018                                    break;
8019                                }
8020                            }
8021                        }
8022                        found.unwrap_or_else(|| default.clone())
8023                    }
8024                } else {
8025                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
8026                    if target_signed < 0 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX) {
8027                        default.clone()
8028                    } else {
8029                        #[allow(clippy::cast_sign_loss)]
8030                        {
8031                            values[target_signed as usize].clone()
8032                        }
8033                    }
8034                };
8035                out_vals[*idx] = v;
8036            }
8037            Ok(())
8038        }
8039        "first_value" | "last_value" | "nth_value" => {
8040            if args.is_empty() {
8041                return Err(EngineError::Unsupported(alloc::format!(
8042                    "{lower}() requires at least one argument"
8043                )));
8044            }
8045            let values: Vec<Value> = slice
8046                .iter()
8047                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8048                .collect::<Result<_, _>>()
8049                .map_err(EngineError::Eval)?;
8050            let nth: usize = if lower == "nth_value" {
8051                if args.len() < 2 {
8052                    return Err(EngineError::Unsupported(
8053                        "nth_value() requires (expr, n)".into(),
8054                    ));
8055                }
8056                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
8057                    .map_err(EngineError::Eval)?;
8058                let raw = match v {
8059                    Value::SmallInt(n) => i64::from(n),
8060                    Value::Int(n) => i64::from(n),
8061                    Value::BigInt(n) => n,
8062                    _ => {
8063                        return Err(EngineError::Unsupported(
8064                            "nth_value() n must be integer".into(),
8065                        ));
8066                    }
8067                };
8068                if raw < 1 {
8069                    return Err(EngineError::Unsupported(
8070                        "nth_value() n must be >= 1".into(),
8071                    ));
8072                }
8073                #[allow(clippy::cast_sign_loss)]
8074                {
8075                    raw as usize
8076                }
8077            } else {
8078                0
8079            };
8080            let eff = effective_frame(frame, ordered)?;
8081            for i in 0..slice.len() {
8082                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8083                let (_, _, idx) = &slice[i];
8084                let v = if lo > hi {
8085                    Value::Null
8086                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
8087                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
8088                    // selecting the boundary value within the frame.
8089                    if lower == "first_value" {
8090                        (lo..=hi)
8091                            .find_map(|j| {
8092                                let v = &values[j];
8093                                (!v.is_null()).then(|| v.clone())
8094                            })
8095                            .unwrap_or(Value::Null)
8096                    } else {
8097                        (lo..=hi)
8098                            .rev()
8099                            .find_map(|j| {
8100                                let v = &values[j];
8101                                (!v.is_null()).then(|| v.clone())
8102                            })
8103                            .unwrap_or(Value::Null)
8104                    }
8105                } else {
8106                    match lower.as_str() {
8107                        "first_value" => values[lo].clone(),
8108                        "last_value" => values[hi].clone(),
8109                        "nth_value" => {
8110                            let pos = lo + nth - 1;
8111                            if pos > hi {
8112                                Value::Null
8113                            } else {
8114                                values[pos].clone()
8115                            }
8116                        }
8117                        _ => unreachable!(),
8118                    }
8119                };
8120                out_vals[*idx] = v;
8121            }
8122            Ok(())
8123        }
8124        "ntile" => {
8125            if args.is_empty() {
8126                return Err(EngineError::Unsupported(
8127                    "ntile(n) requires an integer argument".into(),
8128                ));
8129            }
8130            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
8131                .map_err(EngineError::Eval)?;
8132            let bucket_count: i64 = match v {
8133                Value::SmallInt(n) => i64::from(n),
8134                Value::Int(n) => i64::from(n),
8135                Value::BigInt(n) => n,
8136                _ => {
8137                    return Err(EngineError::Unsupported(
8138                        "ntile() argument must be integer".into(),
8139                    ));
8140                }
8141            };
8142            if bucket_count < 1 {
8143                return Err(EngineError::Unsupported(
8144                    "ntile() argument must be >= 1".into(),
8145                ));
8146            }
8147            #[allow(clippy::cast_sign_loss)]
8148            let buckets = bucket_count as usize;
8149            let n = slice.len();
8150            // Each bucket gets `base` rows; the first `extras` buckets
8151            // get one extra. PG semantics.
8152            let base = n / buckets;
8153            let extras = n % buckets;
8154            let mut bucket: usize = 1;
8155            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
8156            let mut buckets_with_extra_remaining = extras;
8157            for (_, _, idx) in slice {
8158                if remaining_in_bucket == 0 {
8159                    bucket += 1;
8160                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
8161                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
8162                        base + 1
8163                    } else {
8164                        base
8165                    };
8166                    // Edge: if base==0 and extras==0, all rows fit;
8167                    // shouldn't reach here, but guard anyway.
8168                    if remaining_in_bucket == 0 {
8169                        remaining_in_bucket = 1;
8170                    }
8171                }
8172                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
8173                remaining_in_bucket -= 1;
8174            }
8175            Ok(())
8176        }
8177        "percent_rank" => {
8178            // (rank - 1) / (n - 1) where rank is the standard RANK().
8179            // Single-row partitions get 0.
8180            let n = slice.len();
8181            let mut prev_key: Option<&[(Value, bool)]> = None;
8182            let mut current_rank: i64 = 1;
8183            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8184                if let Some(p) = prev_key
8185                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8186                {
8187                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
8188                }
8189                if prev_key.is_none() {
8190                    current_rank = 1;
8191                }
8192                #[allow(clippy::cast_precision_loss)]
8193                let pr = if n <= 1 {
8194                    0.0
8195                } else {
8196                    (current_rank - 1) as f64 / (n - 1) as f64
8197                };
8198                out_vals[*idx] = Value::Float(pr);
8199                prev_key = Some(okey.as_slice());
8200            }
8201            Ok(())
8202        }
8203        "cume_dist" => {
8204            // # rows up to and including this row's peer group / n.
8205            let n = slice.len();
8206            // First pass: find peer-group-end rank for each row.
8207            for i in 0..slice.len() {
8208                let peer_end = peer_group_end(slice, i);
8209                #[allow(clippy::cast_precision_loss)]
8210                let cd = (peer_end + 1) as f64 / n as f64;
8211                let (_, _, idx) = &slice[i];
8212                out_vals[*idx] = Value::Float(cd);
8213            }
8214            Ok(())
8215        }
8216        other => Err(EngineError::Unsupported(alloc::format!(
8217            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
8218        ))),
8219    }
8220}
8221
8222/// v4.20: resolve the user-provided frame down to a normalised
8223/// `(kind, start, end)`. `None` means default — derive from
8224/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
8225/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
8226/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
8227/// end → CURRENT ROW per the PG spec.
8228fn effective_frame(
8229    frame: Option<&WindowFrame>,
8230    ordered: bool,
8231) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
8232    match frame {
8233        None => {
8234            if ordered {
8235                Ok((
8236                    FrameKind::Range,
8237                    FrameBound::UnboundedPreceding,
8238                    FrameBound::CurrentRow,
8239                ))
8240            } else {
8241                Ok((
8242                    FrameKind::Rows,
8243                    FrameBound::UnboundedPreceding,
8244                    FrameBound::UnboundedFollowing,
8245                ))
8246            }
8247        }
8248        Some(fr) => {
8249            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
8250            // Reject start > end (a few impossible combinations).
8251            if matches!(fr.start, FrameBound::UnboundedFollowing)
8252                || matches!(end, FrameBound::UnboundedPreceding)
8253            {
8254                return Err(EngineError::Unsupported(alloc::format!(
8255                    "invalid frame: start={:?} end={:?}",
8256                    fr.start,
8257                    end
8258                )));
8259            }
8260            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
8261            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
8262            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
8263            // implemented in v4.20.
8264            if fr.kind == FrameKind::Range
8265                && (matches!(
8266                    fr.start,
8267                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8268                ) || matches!(
8269                    end,
8270                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8271                ))
8272            {
8273                return Err(EngineError::Unsupported(
8274                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
8275                ));
8276            }
8277            Ok((fr.kind, fr.start.clone(), end))
8278        }
8279    }
8280}
8281
8282/// Compute `(lo, hi)` row-index bounds inside the partition slice
8283/// for the row at position `i`. Inclusive, clamped to
8284/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
8285#[allow(clippy::type_complexity)]
8286fn frame_bounds_for_row(
8287    eff: &(FrameKind, FrameBound, FrameBound),
8288    i: usize,
8289    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8290) -> (usize, usize) {
8291    let (kind, start, end) = eff;
8292    let n = slice.len();
8293    let last = n.saturating_sub(1);
8294    let (mut lo, mut hi) = match kind {
8295        FrameKind::Rows => {
8296            let lo = match start {
8297                FrameBound::UnboundedPreceding => 0,
8298                FrameBound::OffsetPreceding(k) => {
8299                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8300                    i.saturating_sub(k)
8301                }
8302                FrameBound::CurrentRow => i,
8303                FrameBound::OffsetFollowing(k) => {
8304                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8305                    i.saturating_add(k).min(last)
8306                }
8307                FrameBound::UnboundedFollowing => last,
8308            };
8309            let hi = match end {
8310                FrameBound::UnboundedPreceding => 0,
8311                FrameBound::OffsetPreceding(k) => {
8312                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8313                    i.saturating_sub(k)
8314                }
8315                FrameBound::CurrentRow => i,
8316                FrameBound::OffsetFollowing(k) => {
8317                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8318                    i.saturating_add(k).min(last)
8319                }
8320                FrameBound::UnboundedFollowing => last,
8321            };
8322            (lo, hi)
8323        }
8324        FrameKind::Range => {
8325            // RANGE bounds are peer-aware. With only UNBOUNDED and
8326            // CURRENT ROW supported (rejected at effective_frame for
8327            // explicit offsets), the start/end map to the
8328            // partition's full extent at the same-order-key peer
8329            // group boundary.
8330            let lo = match start {
8331                FrameBound::UnboundedPreceding => 0,
8332                FrameBound::CurrentRow => peer_group_start(slice, i),
8333                FrameBound::UnboundedFollowing => last,
8334                _ => unreachable!("offset bounds rejected for RANGE"),
8335            };
8336            let hi = match end {
8337                FrameBound::UnboundedPreceding => 0,
8338                FrameBound::CurrentRow => peer_group_end(slice, i),
8339                FrameBound::UnboundedFollowing => last,
8340                _ => unreachable!("offset bounds rejected for RANGE"),
8341            };
8342            (lo, hi)
8343        }
8344    };
8345    if hi >= n {
8346        hi = last;
8347    }
8348    if lo >= n {
8349        lo = last;
8350    }
8351    (lo, hi)
8352}
8353
8354/// Find the inclusive index of the first row with the same ORDER
8355/// BY key as `slice[i]`. Slice is already sorted by partition then
8356/// order, so peers are contiguous.
8357#[allow(clippy::type_complexity)]
8358fn peer_group_start(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8359    let key = &slice[i].1;
8360    let mut j = i;
8361    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
8362        j -= 1;
8363    }
8364    j
8365}
8366
8367/// Find the inclusive index of the last row with the same ORDER
8368/// BY key as `slice[i]`.
8369#[allow(clippy::type_complexity)]
8370fn peer_group_end(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8371    let key = &slice[i].1;
8372    let mut j = i;
8373    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
8374        j += 1;
8375    }
8376    j
8377}
8378
8379fn value_to_f64(v: &Value) -> Option<f64> {
8380    match v {
8381        Value::SmallInt(n) => Some(f64::from(*n)),
8382        Value::Int(n) => Some(f64::from(*n)),
8383        #[allow(clippy::cast_precision_loss)]
8384        Value::BigInt(n) => Some(*n as f64),
8385        Value::Float(x) => Some(*x),
8386        _ => None,
8387    }
8388}
8389
8390/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
8391/// projection / `order_by` — saves cloning the AST when there are
8392/// none (the common case).
8393fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
8394    let mut any = false;
8395    for item in &stmt.items {
8396        if let SelectItem::Expr { expr, .. } = item {
8397            any = any || expr_has_subquery(expr);
8398        }
8399    }
8400    if let Some(w) = &stmt.where_ {
8401        any = any || expr_has_subquery(w);
8402    }
8403    if let Some(h) = &stmt.having {
8404        any = any || expr_has_subquery(h);
8405    }
8406    for o in &stmt.order_by {
8407        any = any || expr_has_subquery(&o.expr);
8408    }
8409    for (_, peer) in &stmt.unions {
8410        any = any || expr_tree_has_subquery(peer);
8411    }
8412    any
8413}
8414
8415fn expr_has_subquery(e: &Expr) -> bool {
8416    match e {
8417        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
8418        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
8419        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8420            expr_has_subquery(expr)
8421        }
8422        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
8423        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
8424        Expr::Extract { source, .. } => expr_has_subquery(source),
8425        Expr::WindowFunction {
8426            args,
8427            partition_by,
8428            order_by,
8429            ..
8430        } => {
8431            args.iter().any(expr_has_subquery)
8432                || partition_by.iter().any(expr_has_subquery)
8433                || order_by.iter().any(|(e, _)| expr_has_subquery(e))
8434        }
8435        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
8436        Expr::Array(items) => items.iter().any(expr_has_subquery),
8437        Expr::ArraySubscript { target, index } => {
8438            expr_has_subquery(target) || expr_has_subquery(index)
8439        }
8440        Expr::AnyAll { expr, array, .. } => expr_has_subquery(expr) || expr_has_subquery(array),
8441        Expr::Case {
8442            operand,
8443            branches,
8444            else_branch,
8445        } => {
8446            operand.as_deref().is_some_and(expr_has_subquery)
8447                || branches
8448                    .iter()
8449                    .any(|(w, t)| expr_has_subquery(w) || expr_has_subquery(t))
8450                || else_branch.as_deref().is_some_and(expr_has_subquery)
8451        }
8452    }
8453}
8454
8455/// v4.10 helper: materialise a runtime `Value` back into an AST
8456/// `Expr::Literal` for the subquery-rewrite path. Supports the
8457/// types `Literal` can represent (Integer / Float / Text / Bool /
8458/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
8459/// would lose precision through Literal and aren't supported in
8460/// uncorrelated-subquery results; they error with a clear hint.
8461fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
8462    let lit = match v {
8463        Value::Null => Literal::Null,
8464        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
8465        Value::Int(n) => Literal::Integer(i64::from(n)),
8466        Value::BigInt(n) => Literal::Integer(n),
8467        Value::Float(x) => Literal::Float(x),
8468        Value::Text(s) | Value::Json(s) => Literal::String(s),
8469        Value::Bool(b) => Literal::Bool(b),
8470        other => {
8471            return Err(EngineError::Unsupported(alloc::format!(
8472                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
8473                other.data_type()
8474            )));
8475        }
8476    };
8477    Ok(Expr::Literal(lit))
8478}
8479
8480/// v7.13.0 — wider helper used by `INSERT … SELECT` (mailrs
8481/// round-5 G4). Covers the most common `Value` variants. Types
8482/// that need lossy textual round-trip (BYTEA, arrays, ts*)
8483/// surface as an Unsupported error so the caller can add a cast
8484/// in the inner SELECT.
8485fn value_to_literal_expr_permissive(v: Value) -> Result<Expr, EngineError> {
8486    let lit = match v {
8487        Value::Null => Literal::Null,
8488        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
8489        Value::Int(n) => Literal::Integer(i64::from(n)),
8490        Value::BigInt(n) => Literal::Integer(n),
8491        Value::Float(x) => Literal::Float(x),
8492        Value::Text(s) | Value::Json(s) => Literal::String(s),
8493        Value::Bool(b) => Literal::Bool(b),
8494        Value::Vector(xs) => Literal::Vector(xs),
8495        // Date / Timestamp / Timestamptz / Numeric round-trip
8496        // through a TEXT literal that `coerce_value` re-parses
8497        // against the target column type.
8498        Value::Date(days) => {
8499            let micros = (i64::from(days)) * 86_400_000_000;
8500            Literal::String(format_timestamp_micros_as_date(micros))
8501        }
8502        Value::Timestamp(us) => Literal::String(format_timestamp_micros(us)),
8503        Value::Numeric { scaled, scale } => {
8504            Literal::String(format_numeric(scaled, scale))
8505        }
8506        other => {
8507            return Err(EngineError::Unsupported(alloc::format!(
8508                "INSERT … SELECT cannot materialise value of type {:?}; \
8509                 add an explicit CAST in the inner SELECT",
8510                other.data_type()
8511            )));
8512        }
8513    };
8514    Ok(Expr::Literal(lit))
8515}
8516
8517fn format_timestamp_micros(us: i64) -> String {
8518    // Same Y/M/D split used by the wire layer; epoch-relative.
8519    let days = us.div_euclid(86_400_000_000);
8520    let intra_day = us.rem_euclid(86_400_000_000);
8521    let date = format_timestamp_micros_as_date(days * 86_400_000_000);
8522    let secs = intra_day / 1_000_000;
8523    let us_rem = intra_day % 1_000_000;
8524    let h = (secs / 3600) % 24;
8525    let m = (secs / 60) % 60;
8526    let s = secs % 60;
8527    if us_rem == 0 {
8528        alloc::format!("{date} {h:02}:{m:02}:{s:02}")
8529    } else {
8530        alloc::format!("{date} {h:02}:{m:02}:{s:02}.{us_rem:06}")
8531    }
8532}
8533
8534fn format_timestamp_micros_as_date(us: i64) -> String {
8535    // Days since 1970-01-01 → calendar Y-M-D via the proleptic
8536    // Gregorian conversion used by spg-engine's date helpers.
8537    let days = us.div_euclid(86_400_000_000);
8538    // 1970-01-01 = JDN 2440588.
8539    let jdn = days + 2_440_588;
8540    let (y, mo, d) = jdn_to_ymd(jdn);
8541    alloc::format!("{y:04}-{mo:02}-{d:02}")
8542}
8543
8544fn jdn_to_ymd(jdn: i64) -> (i64, u32, u32) {
8545    // Fliegel & Van Flandern (1968) — works for all positive JDNs.
8546    let l = jdn + 68569;
8547    let n = (4 * l) / 146_097;
8548    let l = l - (146_097 * n + 3) / 4;
8549    let i = (4000 * (l + 1)) / 1_461_001;
8550    let l = l - (1461 * i) / 4 + 31;
8551    let j = (80 * l) / 2447;
8552    let day = (l - (2447 * j) / 80) as u32;
8553    let l = j / 11;
8554    let month = (j + 2 - 12 * l) as u32;
8555    let year = 100 * (n - 49) + i + l;
8556    (year, month, day)
8557}
8558
8559fn format_numeric(scaled: i128, scale: u8) -> String {
8560    if scale == 0 {
8561        return alloc::format!("{scaled}");
8562    }
8563    let abs = scaled.unsigned_abs();
8564    let divisor = 10u128.pow(u32::from(scale));
8565    let whole = abs / divisor;
8566    let frac = abs % divisor;
8567    let sign = if scaled < 0 { "-" } else { "" };
8568    alloc::format!(
8569        "{sign}{whole}.{frac:0width$}",
8570        width = usize::from(scale)
8571    )
8572}
8573
8574/// v6.1.1 — walk the prepared `Statement` AST and replace every
8575/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
8576/// params[n-1]))`. The dispatch downstream sees a `Statement`
8577/// indistinguishable from a simple-query parse, so the exec path
8578/// stays unchanged.
8579///
8580/// Errors fall into one shape: a `$N` references past the bound
8581/// `params.len()`. Out-of-range happens when the Bind didn't
8582/// supply enough values; pgwire surfaces this as a protocol error
8583/// to the client.
8584fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
8585    match stmt {
8586        Statement::Select(s) => substitute_select(s, params)?,
8587        Statement::Insert(ins) => {
8588            for row in &mut ins.rows {
8589                for e in row {
8590                    substitute_expr(e, params)?;
8591                }
8592            }
8593        }
8594        Statement::Update(u) => {
8595            for (_, e) in &mut u.assignments {
8596                substitute_expr(e, params)?;
8597            }
8598            if let Some(w) = &mut u.where_ {
8599                substitute_expr(w, params)?;
8600            }
8601        }
8602        Statement::Delete(d) => {
8603            if let Some(w) = &mut d.where_ {
8604                substitute_expr(w, params)?;
8605            }
8606        }
8607        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
8608        // Other statements (CREATE / BEGIN / SHOW / …) have no
8609        // expression slots; no walk needed.
8610        _ => {}
8611    }
8612    Ok(())
8613}
8614
8615fn substitute_select(s: &mut SelectStatement, params: &[Value]) -> Result<(), EngineError> {
8616    for item in &mut s.items {
8617        if let SelectItem::Expr { expr, .. } = item {
8618            substitute_expr(expr, params)?;
8619        }
8620    }
8621    if let Some(w) = &mut s.where_ {
8622        substitute_expr(w, params)?;
8623    }
8624    if let Some(gs) = &mut s.group_by {
8625        for g in gs {
8626            substitute_expr(g, params)?;
8627        }
8628    }
8629    if let Some(h) = &mut s.having {
8630        substitute_expr(h, params)?;
8631    }
8632    for o in &mut s.order_by {
8633        substitute_expr(&mut o.expr, params)?;
8634    }
8635    for (_, peer) in &mut s.unions {
8636        substitute_select(peer, params)?;
8637    }
8638    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
8639    // mailrs H2. After this pass each LIMIT/OFFSET that was a
8640    // Placeholder is rewritten to Literal so the existing
8641    // `LimitExpr::as_literal` path consumes a concrete u32.
8642    if let Some(le) = s.limit {
8643        s.limit = Some(resolve_limit_placeholder(le, params)?);
8644    }
8645    if let Some(le) = s.offset {
8646        s.offset = Some(resolve_limit_placeholder(le, params)?);
8647    }
8648    Ok(())
8649}
8650
8651fn resolve_limit_placeholder(
8652    le: spg_sql::ast::LimitExpr,
8653    params: &[Value],
8654) -> Result<spg_sql::ast::LimitExpr, EngineError> {
8655    use spg_sql::ast::LimitExpr;
8656    match le {
8657        LimitExpr::Literal(_) => Ok(le),
8658        LimitExpr::Placeholder(n) => {
8659            let idx = usize::from(n).saturating_sub(1);
8660            let v = params.get(idx).ok_or_else(|| {
8661                EngineError::Eval(EvalError::PlaceholderOutOfRange {
8662                    n,
8663                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
8664                })
8665            })?;
8666            let int = match v {
8667                Value::SmallInt(x) => Some(i64::from(*x)),
8668                Value::Int(x) => Some(i64::from(*x)),
8669                Value::BigInt(x) => Some(*x),
8670                _ => None,
8671            }
8672            .ok_or_else(|| {
8673                EngineError::Unsupported(alloc::format!(
8674                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
8675                ))
8676            })?;
8677            if int < 0 {
8678                return Err(EngineError::Unsupported(alloc::format!(
8679                    "LIMIT/OFFSET ${n} bound to negative value {int}"
8680                )));
8681            }
8682            let bounded = u32::try_from(int).map_err(|_| {
8683                EngineError::Unsupported(alloc::format!(
8684                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
8685                ))
8686            })?;
8687            Ok(LimitExpr::Literal(bounded))
8688        }
8689    }
8690}
8691
8692fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
8693    if let Expr::Placeholder(n) = e {
8694        let idx = usize::from(*n).saturating_sub(1);
8695        let v = params.get(idx).ok_or_else(|| {
8696            EngineError::Eval(EvalError::PlaceholderOutOfRange {
8697                n: *n,
8698                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
8699            })
8700        })?;
8701        *e = Expr::Literal(value_to_literal(v.clone()));
8702        return Ok(());
8703    }
8704    match e {
8705        Expr::Binary { lhs, rhs, .. } => {
8706            substitute_expr(lhs, params)?;
8707            substitute_expr(rhs, params)?;
8708        }
8709        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8710            substitute_expr(expr, params)?;
8711        }
8712        Expr::FunctionCall { args, .. } => {
8713            for a in args {
8714                substitute_expr(a, params)?;
8715            }
8716        }
8717        Expr::Like { expr, pattern, .. } => {
8718            substitute_expr(expr, params)?;
8719            substitute_expr(pattern, params)?;
8720        }
8721        Expr::Extract { source, .. } => substitute_expr(source, params)?,
8722        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
8723        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
8724        Expr::InSubquery { expr, subquery, .. } => {
8725            substitute_expr(expr, params)?;
8726            substitute_select(subquery, params)?;
8727        }
8728        Expr::WindowFunction {
8729            args,
8730            partition_by,
8731            order_by,
8732            ..
8733        } => {
8734            for a in args {
8735                substitute_expr(a, params)?;
8736            }
8737            for p in partition_by {
8738                substitute_expr(p, params)?;
8739            }
8740            for (e, _) in order_by {
8741                substitute_expr(e, params)?;
8742            }
8743        }
8744        Expr::Literal(_) | Expr::Column(_) => {}
8745        // Already handled above.
8746        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
8747        Expr::Array(items) => {
8748            for elem in items {
8749                substitute_expr(elem, params)?;
8750            }
8751        }
8752        Expr::ArraySubscript { target, index } => {
8753            substitute_expr(target, params)?;
8754            substitute_expr(index, params)?;
8755        }
8756        Expr::AnyAll { expr, array, .. } => {
8757            substitute_expr(expr, params)?;
8758            substitute_expr(array, params)?;
8759        }
8760        Expr::Case {
8761            operand,
8762            branches,
8763            else_branch,
8764        } => {
8765            if let Some(o) = operand {
8766                substitute_expr(o, params)?;
8767            }
8768            for (w, t) in branches {
8769                substitute_expr(w, params)?;
8770                substitute_expr(t, params)?;
8771            }
8772            if let Some(e) = else_branch {
8773                substitute_expr(e, params)?;
8774            }
8775        }
8776    }
8777    Ok(())
8778}
8779
8780/// v6.1.1 — convert a runtime `Value` into the closest matching
8781/// `Literal` for the substitute walker. Lossless for the simple
8782/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
8783/// / Json / Interval render as their canonical text form so the
8784/// downstream coerce_value can re-parse against the target column
8785/// type. SQ8 / HalfVector cells are NOT expected as bind params;
8786/// pgwire's Bind decodes vector params to the f32 representation
8787/// before they reach this helper.
8788/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
8789/// column's non-NULL sample before histogram building. Cross-type
8790/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
8791/// same widening the eval-side `compare` operator uses; everything
8792/// else (the genuinely-incompatible pairs) falls back to ordering
8793/// by canonical string form so the sort is still total + stable.
8794/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
8795/// here only via the string-fallback path because vector columns
8796/// are filtered out upstream.
8797fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
8798    use core::cmp::Ordering;
8799    match (a, b) {
8800        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
8801        (Value::Int(a), Value::Int(b)) => a.cmp(b),
8802        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
8803        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
8804        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
8805        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
8806        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
8807        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
8808        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
8809        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
8810        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
8811        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
8812        (Value::Date(a), Value::Date(b)) => a.cmp(b),
8813        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
8814        // Mixed numeric/float — widen to f64 and compare.
8815        (Value::SmallInt(n), Value::Float(x)) => {
8816            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
8817        }
8818        (Value::Float(x), Value::SmallInt(n)) => {
8819            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
8820        }
8821        (Value::Int(n), Value::Float(x)) => {
8822            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
8823        }
8824        (Value::Float(x), Value::Int(n)) => {
8825            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
8826        }
8827        (Value::BigInt(n), Value::Float(x)) => {
8828            #[allow(clippy::cast_precision_loss)]
8829            let nf = *n as f64;
8830            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
8831        }
8832        (Value::Float(x), Value::BigInt(n)) => {
8833            #[allow(clippy::cast_precision_loss)]
8834            let nf = *n as f64;
8835            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
8836        }
8837        // Cross-type fallback: lexicographic on canonical form.
8838        // Total + stable so the sort is well-defined.
8839        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
8840    }
8841}
8842
8843/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
8844/// string for the `spg_statistic.histogram_bounds` column. Values
8845/// containing `,` or `[` / `]` are JSON-style escaped so the
8846/// rendering round-trips through a future parser; v6.2.0 only
8847/// uses the rendered form for human consumption, so the escaping
8848/// is conservative.
8849fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
8850    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
8851    out.push('[');
8852    for (i, b) in bounds.iter().enumerate() {
8853        if i > 0 {
8854            out.push_str(", ");
8855        }
8856        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
8857        if needs_quote {
8858            out.push('"');
8859            for ch in b.chars() {
8860                if ch == '"' || ch == '\\' {
8861                    out.push('\\');
8862                }
8863                out.push(ch);
8864            }
8865            out.push('"');
8866        } else {
8867            out.push_str(b);
8868        }
8869    }
8870    out.push(']');
8871    out
8872}
8873
8874/// v6.2.0 — canonical textual form of a `Value` for histogram
8875/// bound storage. Strings used by ANALYZE for sort + bound output.
8876/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
8877/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
8878/// the same form `format_date` / `format_timestamp` produce for
8879/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
8880/// reach this only via a non-Vector column (vector columns are
8881/// skipped upstream); they fall back to a Debug-derived form so
8882/// stats still serialise without crashing.
8883pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
8884    match v {
8885        Value::Null => "NULL".to_string(),
8886        Value::SmallInt(n) => alloc::format!("{n}"),
8887        Value::Int(n) => alloc::format!("{n}"),
8888        Value::BigInt(n) => alloc::format!("{n}"),
8889        Value::Float(x) => alloc::format!("{x:?}"),
8890        Value::Text(s) | Value::Json(s) => s.clone(),
8891        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
8892        Value::Date(d) => eval::format_date(*d),
8893        Value::Timestamp(t) => eval::format_timestamp(*t),
8894        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
8895        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
8896        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
8897            // Unreachable in practice (vector columns are filtered
8898            // out before this). Defensive fallback so a future
8899            // vector-stats path doesn't crash.
8900            alloc::format!("{v:?}")
8901        }
8902        // v7.5.0 — Value is #[non_exhaustive] for downstream
8903        // forward-compat. Future variants fall through to Debug
8904        // form here (same shape as the vector fallback above).
8905        _ => alloc::format!("{v:?}"),
8906    }
8907}
8908
8909/// v6.2.0 — true for engine-managed catalog tables that the bare
8910/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
8911/// tables yet (publications / subscriptions / users / statistics
8912/// all live as engine fields, not catalog tables), so this is a
8913/// reserved future-proofing hook — every existing user table is
8914/// analysed.
8915const fn is_internal_table_name(_name: &str) -> bool {
8916    false
8917}
8918
8919fn value_to_literal(v: Value) -> Literal {
8920    match v {
8921        Value::Null => Literal::Null,
8922        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
8923        Value::Int(n) => Literal::Integer(i64::from(n)),
8924        Value::BigInt(n) => Literal::Integer(n),
8925        Value::Float(x) => Literal::Float(x),
8926        Value::Text(s) | Value::Json(s) => Literal::String(s),
8927        Value::Bool(b) => Literal::Bool(b),
8928        Value::Vector(v) => Literal::Vector(v),
8929        Value::Numeric { scaled, scale } => Literal::String(eval::format_numeric(scaled, scale)),
8930        Value::Date(d) => Literal::String(eval::format_date(d)),
8931        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
8932        Value::Interval { months, micros } => Literal::Interval {
8933            months,
8934            micros,
8935            text: eval::format_interval(months, micros),
8936        },
8937        // SQ8 / halfvec cells dequantise to f32 before reaching the
8938        // substitute walker; pgwire's Bind path handles that.
8939        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
8940        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
8941        // v7.5.0 — Value is #[non_exhaustive]; future variants
8942        // render as Debug-form String literal until explicit
8943        // mapping is added.
8944        v => Literal::String(alloc::format!("{v:?}")),
8945    }
8946}
8947
8948fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
8949    let Some(now) = now_micros else {
8950        return;
8951    };
8952    match stmt {
8953        Statement::Select(s) => rewrite_select_clock(s, now),
8954        Statement::Insert(ins) => {
8955            for row in &mut ins.rows {
8956                for e in row {
8957                    rewrite_expr_clock(e, now);
8958                }
8959            }
8960        }
8961        _ => {}
8962    }
8963}
8964
8965fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
8966    for item in &mut s.items {
8967        if let SelectItem::Expr { expr, .. } = item {
8968            rewrite_expr_clock(expr, now);
8969        }
8970    }
8971    if let Some(w) = &mut s.where_ {
8972        rewrite_expr_clock(w, now);
8973    }
8974    if let Some(gs) = &mut s.group_by {
8975        for g in gs {
8976            rewrite_expr_clock(g, now);
8977        }
8978    }
8979    if let Some(h) = &mut s.having {
8980        rewrite_expr_clock(h, now);
8981    }
8982    for o in &mut s.order_by {
8983        rewrite_expr_clock(&mut o.expr, now);
8984    }
8985    for (_, peer) in &mut s.unions {
8986        rewrite_select_clock(peer, now);
8987    }
8988}
8989
8990/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
8991/// Literal / Column-with-qualifier (the dominant cases on a typical
8992/// AST) take a single pattern dispatch and exit. The clock-rewrite
8993/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
8994/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
8995/// refs) sit on their own arms with match guards so the fall-through
8996/// to the recursive arms is unambiguous.
8997fn rewrite_expr_clock(e: &mut Expr, now: i64) {
8998    // Fast-path test on the no-recursion shapes first. We can't fold
8999    // them into the big match below because they need to *replace* `e`
9000    // outright; the recursive arms below match on its sub-fields.
9001    if let Some(replacement) = clock_replacement_for(e, now) {
9002        *e = replacement;
9003        return;
9004    }
9005    match e {
9006        Expr::Binary { lhs, rhs, .. } => {
9007            rewrite_expr_clock(lhs, now);
9008            rewrite_expr_clock(rhs, now);
9009        }
9010        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9011            rewrite_expr_clock(expr, now);
9012        }
9013        Expr::FunctionCall { args, .. } => {
9014            for a in args {
9015                rewrite_expr_clock(a, now);
9016            }
9017        }
9018        Expr::Like { expr, pattern, .. } => {
9019            rewrite_expr_clock(expr, now);
9020            rewrite_expr_clock(pattern, now);
9021        }
9022        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
9023        // v4.10 subquery nodes — recurse into the inner SELECT's
9024        // expression slots so e.g. SELECT NOW() in a scalar
9025        // subquery picks up the same instant as the outer query.
9026        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
9027        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
9028        Expr::InSubquery { expr, subquery, .. } => {
9029            rewrite_expr_clock(expr, now);
9030            rewrite_select_clock(subquery, now);
9031        }
9032        // v4.12 window functions — args + PARTITION BY + ORDER BY
9033        // may all reference clock literals.
9034        Expr::WindowFunction {
9035            args,
9036            partition_by,
9037            order_by,
9038            ..
9039        } => {
9040            for a in args {
9041                rewrite_expr_clock(a, now);
9042            }
9043            for p in partition_by {
9044                rewrite_expr_clock(p, now);
9045            }
9046            for (e, _) in order_by {
9047                rewrite_expr_clock(e, now);
9048            }
9049        }
9050        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
9051        Expr::Array(items) => {
9052            for elem in items {
9053                rewrite_expr_clock(elem, now);
9054            }
9055        }
9056        Expr::ArraySubscript { target, index } => {
9057            rewrite_expr_clock(target, now);
9058            rewrite_expr_clock(index, now);
9059        }
9060        Expr::AnyAll { expr, array, .. } => {
9061            rewrite_expr_clock(expr, now);
9062            rewrite_expr_clock(array, now);
9063        }
9064        Expr::Case {
9065            operand,
9066            branches,
9067            else_branch,
9068        } => {
9069            if let Some(o) = operand {
9070                rewrite_expr_clock(o, now);
9071            }
9072            for (w, t) in branches {
9073                rewrite_expr_clock(w, now);
9074                rewrite_expr_clock(t, now);
9075            }
9076            if let Some(e) = else_branch {
9077                rewrite_expr_clock(e, now);
9078            }
9079        }
9080    }
9081}
9082
9083/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
9084/// must be rewritten; otherwise `None` so the caller falls through to
9085/// the recursive walk. Identifies both function-call forms (`NOW()` /
9086/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
9087/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
9088/// which is how PG accepts them without parens).
9089fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
9090    let (kind, name) = match e {
9091        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
9092        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
9093        _ => return None,
9094    };
9095    // ASCII case-insensitive name match. Limited to the three keywords
9096    // that actually need rewriting.
9097    let matched = match name.len() {
9098        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => Some(true),
9099        12 if name.eq_ignore_ascii_case("current_date") => Some(false),
9100        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(true),
9101        _ => None,
9102    };
9103    let is_timestamp = matched?;
9104    let payload = if is_timestamp {
9105        now
9106    } else {
9107        now.div_euclid(86_400_000_000)
9108    };
9109    let target = if is_timestamp {
9110        spg_sql::ast::CastTarget::Timestamp
9111    } else {
9112        spg_sql::ast::CastTarget::Date
9113    };
9114    Some(Expr::Cast {
9115        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
9116        target,
9117    })
9118}
9119
9120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9121enum ClockSite {
9122    Fn,
9123    BareIdent,
9124}
9125
9126/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
9127/// Swap the integer literal for the matching item's expression so the
9128/// executor doesn't need a special-case branch. Recurses into UNION
9129/// peers because each peer keeps its own SELECT list.
9130/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
9131/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
9132/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
9133/// and groups by whatever explicit non-aggregates remain — none in
9134/// the wildcard-only case, which still works for non-aggregate
9135/// queries).
9136fn expand_group_by_all(s: &mut SelectStatement) {
9137    if !s.group_by_all {
9138        for (_, peer) in &mut s.unions {
9139            expand_group_by_all(peer);
9140        }
9141        return;
9142    }
9143    let mut groups: Vec<Expr> = Vec::new();
9144    for item in &s.items {
9145        if let SelectItem::Expr { expr, .. } = item
9146            && !aggregate::contains_aggregate(expr)
9147        {
9148            groups.push(expr.clone());
9149        }
9150    }
9151    s.group_by = Some(groups);
9152    s.group_by_all = false;
9153    for (_, peer) in &mut s.unions {
9154        expand_group_by_all(peer);
9155    }
9156}
9157
9158fn resolve_order_by_position(s: &mut SelectStatement) {
9159    // v6.4.0 — iterate every ORDER BY key. Position references
9160    // (`ORDER BY 2`) bind to the 1-based projection index;
9161    // identifier references that match a SELECT-list alias bind to
9162    // the projected expression (Step 4 of L3a).
9163    for order in &mut s.order_by {
9164        match &order.expr {
9165            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
9166                if let Ok(idx_one_based) = usize::try_from(*n) {
9167                    let idx = idx_one_based - 1;
9168                    if idx < s.items.len()
9169                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
9170                    {
9171                        order.expr = expr.clone();
9172                    }
9173                }
9174            }
9175            Expr::Column(c) if c.qualifier.is_none() => {
9176                // Alias-in-ORDER-BY lookup.
9177                for item in &s.items {
9178                    if let SelectItem::Expr {
9179                        expr,
9180                        alias: Some(a),
9181                    } = item
9182                        && a == &c.name
9183                    {
9184                        order.expr = expr.clone();
9185                        break;
9186                    }
9187                }
9188            }
9189            _ => {}
9190        }
9191    }
9192    for (_, peer) in &mut s.unions {
9193        resolve_order_by_position(peer);
9194    }
9195}
9196
9197/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
9198/// Used by the UNION ORDER BY path; per-block paths inline the same
9199/// comparator because they already hold `&OrderBy` directly.
9200/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
9201/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
9202/// partition the prefix in O(n), then sort just that prefix in O(k
9203/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
9204/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
9205/// full-sort behaviour.
9206///
9207/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
9208/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
9209fn partial_sort_tagged(tagged: &mut Vec<(Vec<f64>, Row)>, keep: Option<usize>, descs: &[bool]) {
9210    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
9211    match keep {
9212        Some(k) if k < tagged.len() && k > 0 => {
9213            let pivot = k - 1;
9214            tagged.select_nth_unstable_by(pivot, cmp);
9215            tagged[..k].sort_by(cmp);
9216            tagged.truncate(k);
9217        }
9218        _ => {
9219            tagged.sort_by(cmp);
9220        }
9221    }
9222}
9223
9224fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
9225    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
9226}
9227
9228/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
9229/// flag is honored independently. NULL is encoded as `f64::INFINITY`
9230/// so it sorts last in ASC and first in DESC (matches PG default).
9231fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
9232    use core::cmp::Ordering;
9233    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
9234        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
9235        let ord = if descs.get(i).copied().unwrap_or(false) {
9236            ord.reverse()
9237        } else {
9238            ord
9239        };
9240        if ord != Ordering::Equal {
9241            return ord;
9242        }
9243    }
9244    Ordering::Equal
9245}
9246
9247/// v6.4.0 — eval every ORDER BY expression for a row and pack the
9248/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
9249fn build_order_keys(
9250    order_by: &[OrderBy],
9251    row: &Row,
9252    ctx: &EvalContext,
9253) -> Result<Vec<f64>, EngineError> {
9254    let mut keys = Vec::with_capacity(order_by.len());
9255    for o in order_by {
9256        let v = eval::eval_expr(&o.expr, row, ctx)?;
9257        keys.push(value_to_order_key(&v)?);
9258    }
9259    Ok(keys)
9260}
9261
9262/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
9263/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
9264/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
9265fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
9266    if let Some(off) = offset {
9267        let off = off as usize;
9268        if off >= rows.len() {
9269            rows.clear();
9270        } else {
9271            rows.drain(..off);
9272        }
9273    }
9274    if let Some(n) = limit {
9275        rows.truncate(n as usize);
9276    }
9277}
9278
9279/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
9280/// names + parent table name) into the storage-layer shape (column
9281/// indices + same parent table). Validates everything the engine
9282/// needs to know about the FK at CREATE TABLE time:
9283///
9284///   - parent table exists (catalog lookup, unless self-referencing)
9285///   - parent columns exist on the parent table
9286///   - parent column list matches the local arity (defaults to the
9287///     parent's primary index column when omitted)
9288///   - parent columns are covered by a `BTree` UNIQUE-class index
9289///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
9290///     the v7.6.2 INSERT path can do an O(log n) parent lookup
9291///   - local columns exist on the table being created
9292fn resolve_foreign_key(
9293    local_table_name: &str,
9294    local_cols: &[ColumnSchema],
9295    fk: spg_sql::ast::ForeignKeyConstraint,
9296    catalog: &Catalog,
9297) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
9298    // Resolve local columns.
9299    let mut local_columns = Vec::with_capacity(fk.columns.len());
9300    for name in &fk.columns {
9301        let pos = local_cols
9302            .iter()
9303            .position(|c| c.name == *name)
9304            .ok_or_else(|| {
9305                EngineError::Unsupported(alloc::format!(
9306                    "FOREIGN KEY references unknown local column {name:?}"
9307                ))
9308            })?;
9309        local_columns.push(pos);
9310    }
9311    // Self-referencing FK: parent table is the one we're creating.
9312    // The parent column resolution uses the local column list since
9313    // the catalog doesn't have this table yet.
9314    let is_self_ref = fk.parent_table == local_table_name;
9315    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
9316        (local_cols, local_table_name)
9317    } else {
9318        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
9319            EngineError::Storage(StorageError::TableNotFound {
9320                name: fk.parent_table.clone(),
9321            })
9322        })?;
9323        (
9324            parent_table.schema().columns.as_slice(),
9325            fk.parent_table.as_str(),
9326        )
9327    };
9328    // Resolve parent column names → positions. If the FK omitted the
9329    // parent column list, fall back to the parent's primary index
9330    // column (single-column only — composite default is rejected
9331    // because there's no unambiguous "PK" in SPG's index list).
9332    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
9333        if fk.columns.len() != 1 {
9334            return Err(EngineError::Unsupported(
9335                "composite FOREIGN KEY without explicit parent column list is not supported \
9336                 — list the parent columns explicitly"
9337                    .into(),
9338            ));
9339        }
9340        // Find a single BTree index on the parent and use its column.
9341        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
9342            .ok_or_else(|| {
9343                EngineError::Unsupported(alloc::format!(
9344                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
9345                     to default the FOREIGN KEY against"
9346                ))
9347            })?;
9348        alloc::vec![pos]
9349    } else {
9350        let mut out = Vec::with_capacity(fk.parent_columns.len());
9351        for name in &fk.parent_columns {
9352            let pos = parent_cols_for_lookup
9353                .iter()
9354                .position(|c| c.name == *name)
9355                .ok_or_else(|| {
9356                    EngineError::Unsupported(alloc::format!(
9357                        "FOREIGN KEY references unknown parent column \
9358                         {name:?} on table {parent_table_str:?}"
9359                    ))
9360                })?;
9361            out.push(pos);
9362        }
9363        out
9364    };
9365    if parent_columns.len() != local_columns.len() {
9366        return Err(EngineError::Unsupported(alloc::format!(
9367            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
9368            local_columns.len(),
9369            parent_columns.len()
9370        )));
9371    }
9372    // For non-self-referencing FKs, verify the parent column set is
9373    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
9374    // declaration; the convention is "the parent column for FK
9375    // purposes must have a BTree index" — which the user creates via
9376    // `CREATE INDEX ... USING btree (col)` (the default). We accept
9377    // any single-column BTree index that covers a parent column;
9378    // composite parent column lists require an index whose `column_position`
9379    // matches the first parent column (multi-column BTree indices
9380    // are not in the v7.x roadmap).
9381    if !is_self_ref {
9382        let parent_table = catalog.get(&fk.parent_table).expect("checked above");
9383        let primary_parent_col = parent_columns[0];
9384        let has_btree = parent_table
9385            .schema()
9386            .columns
9387            .get(primary_parent_col)
9388            .is_some()
9389            && parent_table.indices().iter().any(|idx| {
9390                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
9391                    && idx.column_position == primary_parent_col
9392                    && idx.partial_predicate.is_none()
9393            });
9394        if !has_btree {
9395            return Err(EngineError::Unsupported(alloc::format!(
9396                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
9397                 index — create one with `CREATE INDEX ... ON {} ({})` first",
9398                parent_table_str,
9399                parent_table_str,
9400                parent_table.schema().columns[primary_parent_col].name,
9401            )));
9402        }
9403    }
9404    let on_delete = fk_action_sql_to_storage(fk.on_delete);
9405    let on_update = fk_action_sql_to_storage(fk.on_update);
9406    Ok(spg_storage::ForeignKeyConstraint {
9407        name: fk.name,
9408        local_columns,
9409        parent_table: fk.parent_table,
9410        parent_columns,
9411        on_delete,
9412        on_update,
9413    })
9414}
9415
9416/// v7.6.1 — pick a sentinel "primary key" column from the parent
9417/// table when the FK didn't name parent columns. Picks the first
9418/// single-column unconditional BTree index — that's the closest
9419/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
9420/// `local_cols` as the column source.
9421fn pick_pk_index_column(
9422    catalog: &Catalog,
9423    parent_name: &str,
9424    is_self_ref: bool,
9425    local_cols: &[ColumnSchema],
9426) -> Option<usize> {
9427    if is_self_ref {
9428        // Self-ref FK omitted parent columns: pick column 0 by
9429        // convention (no catalog entry yet). Engine will widen this
9430        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
9431        let _ = local_cols;
9432        return Some(0);
9433    }
9434    let parent = catalog.get(parent_name)?;
9435    parent.indices().iter().find_map(|idx| {
9436        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
9437            && idx.partial_predicate.is_none()
9438            && idx.included_columns.is_empty()
9439            && idx.expression.is_none()
9440        {
9441            Some(idx.column_position)
9442        } else {
9443            None
9444        }
9445    })
9446}
9447
9448/// v7.9.8 / v7.9.10 — resolve the column positions that
9449/// identify a conflict for ON CONFLICT. Returns a Vec of
9450/// column positions (1 element for single-column form, N for
9451/// composite). When the user wrote bare `ON CONFLICT DO …`,
9452/// falls back to the table's first unconditional BTree index
9453/// (always single-column today).
9454fn resolve_on_conflict_columns(
9455    catalog: &Catalog,
9456    table_name: &str,
9457    target: &[String],
9458) -> Result<Vec<usize>, EngineError> {
9459    let table = catalog.get(table_name).ok_or_else(|| {
9460        EngineError::Storage(StorageError::TableNotFound {
9461            name: table_name.into(),
9462        })
9463    })?;
9464    if target.is_empty() {
9465        // v7.13.2 — mailrs round-6 S5 follow-up. Composite UNIQUE
9466        // constraints carry a multi-column tuple; the prior code
9467        // path picked only the leading column of the first BTree
9468        // index, which caused `ON CONFLICT DO NOTHING` to dedup
9469        // by leading column alone (3 rows with same group_id but
9470        // different permission collapsed to 1). PG semantics use
9471        // the full tuple. Prefer a UniquenessConstraint's full
9472        // column list when one exists; fall back to the leading
9473        // BTree column for legacy single-column UNIQUE.
9474        if let Some(uc) = table.schema().uniqueness_constraints.first() {
9475            return Ok(uc.columns.clone());
9476        }
9477        let pos = table
9478            .indices()
9479            .iter()
9480            .find_map(|idx| {
9481                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
9482                    && idx.partial_predicate.is_none()
9483                    && idx.included_columns.is_empty()
9484                    && idx.expression.is_none()
9485                {
9486                    Some(idx.column_position)
9487                } else {
9488                    None
9489                }
9490            })
9491            .ok_or_else(|| {
9492                EngineError::Unsupported(alloc::format!(
9493                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
9494                ))
9495            })?;
9496        return Ok(alloc::vec![pos]);
9497    }
9498    let mut out = Vec::with_capacity(target.len());
9499    for name in target {
9500        let pos = table
9501            .schema()
9502            .columns
9503            .iter()
9504            .position(|c| c.name == *name)
9505            .ok_or_else(|| {
9506                EngineError::Unsupported(alloc::format!(
9507                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
9508                ))
9509            })?;
9510        out.push(pos);
9511    }
9512    Ok(out)
9513}
9514
9515/// v7.9.8 — check whether the BTree index on `column_pos` of
9516/// `table_name` already has a row with this key.
9517fn on_conflict_key_exists(
9518    catalog: &Catalog,
9519    table_name: &str,
9520    column_pos: usize,
9521    key: &Value,
9522) -> bool {
9523    let Some(table) = catalog.get(table_name) else {
9524        return false;
9525    };
9526    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
9527        return false;
9528    };
9529    table.indices().iter().any(|idx| {
9530        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
9531            && idx.column_position == column_pos
9532            && idx.partial_predicate.is_none()
9533            && !idx.lookup_eq(&idx_key).is_empty()
9534    })
9535}
9536
9537/// v7.9.9 / v7.9.10 — look up an existing row's position by
9538/// matching all `column_positions` against the incoming `key`
9539/// tuple. Single-column shape (one column) reduces to the
9540/// canonical PK lookup; composite shapes scan linearly until
9541/// every position matches.
9542fn lookup_row_position_by_keys(
9543    catalog: &Catalog,
9544    table_name: &str,
9545    column_positions: &[usize],
9546    key: &[&Value],
9547) -> Option<usize> {
9548    let table = catalog.get(table_name)?;
9549    table.rows().iter().position(|r| {
9550        column_positions
9551            .iter()
9552            .enumerate()
9553            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
9554    })
9555}
9556
9557/// v7.9.10 — does the table already contain a row whose
9558/// `column_positions` tuple equals `key`? Single-column shape
9559/// uses the existing BTree fast path; composite shapes fall
9560/// back to a row scan.
9561fn on_conflict_keys_exist(
9562    catalog: &Catalog,
9563    table_name: &str,
9564    column_positions: &[usize],
9565    key: &[&Value],
9566) -> bool {
9567    if column_positions.len() == 1 {
9568        return on_conflict_key_exists(catalog, table_name, column_positions[0], key[0]);
9569    }
9570    let Some(table) = catalog.get(table_name) else {
9571        return false;
9572    };
9573    table.rows().iter().any(|r| {
9574        column_positions
9575            .iter()
9576            .enumerate()
9577            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
9578    })
9579}
9580
9581/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
9582/// existing row.
9583///
9584/// `incoming` is the rejected INSERT row (used to resolve
9585/// `EXCLUDED.col` references in the assignment exprs);
9586/// `target_pos` is the position of the existing row in the table.
9587/// Each assignment substitutes `EXCLUDED.col` with the matching
9588/// incoming value, evaluates the resulting expression against
9589/// the existing row, and writes the new value into the
9590/// corresponding column of the returned `Vec<Value>`. If
9591/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
9592/// the conflicting row is silently kept unchanged.
9593fn apply_on_conflict_assignments(
9594    catalog: &Catalog,
9595    table_name: &str,
9596    target_pos: usize,
9597    incoming: &[Value],
9598    assignments: &[(String, Expr)],
9599    where_: Option<&Expr>,
9600) -> Result<Option<Vec<Value>>, EngineError> {
9601    let table = catalog.get(table_name).ok_or_else(|| {
9602        EngineError::Storage(StorageError::TableNotFound {
9603            name: table_name.into(),
9604        })
9605    })?;
9606    let schema_cols = table.schema().columns.clone();
9607    let existing = table
9608        .rows()
9609        .get(target_pos)
9610        .ok_or_else(|| {
9611            EngineError::Unsupported(alloc::format!(
9612                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
9613            ))
9614        })?
9615        .clone();
9616    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
9617    // Optional WHERE filter on the conflict row.
9618    if let Some(w) = where_ {
9619        let pred = w.clone();
9620        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
9621        let v = eval::eval_expr(&pred, &existing, &ctx)?;
9622        if !matches!(v, Value::Bool(true)) {
9623            return Ok(None);
9624        }
9625    }
9626    let mut new_values = existing.values.clone();
9627    for (col_name, expr) in assignments {
9628        let target_idx = schema_cols
9629            .iter()
9630            .position(|c| c.name == *col_name)
9631            .ok_or_else(|| {
9632                EngineError::Eval(EvalError::ColumnNotFound {
9633                    name: col_name.clone(),
9634                })
9635            })?;
9636        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
9637        let v = eval::eval_expr(&sub, &existing, &ctx)?;
9638        new_values[target_idx] = coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
9639    }
9640    Ok(Some(new_values))
9641}
9642
9643/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
9644/// "EXCLUDED", name }` reference with a `Literal` of the matching
9645/// value from the incoming-row vec. Resolution against the
9646/// child-table column list (by name).
9647fn substitute_excluded_refs(expr: Expr, schema_cols: &[ColumnSchema], incoming: &[Value]) -> Expr {
9648    use spg_sql::ast::ColumnName;
9649    match expr {
9650        Expr::Column(ColumnName { qualifier, name })
9651            if qualifier
9652                .as_deref()
9653                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
9654        {
9655            let pos = schema_cols.iter().position(|c| c.name == name);
9656            match pos {
9657                Some(p) => {
9658                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
9659                    value_to_literal_expr(v)
9660                        .unwrap_or_else(|_| Expr::Literal(spg_sql::ast::Literal::Null))
9661                }
9662                None => Expr::Column(ColumnName { qualifier, name }),
9663            }
9664        }
9665        Expr::Binary { op, lhs, rhs } => Expr::Binary {
9666            op,
9667            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
9668            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
9669        },
9670        Expr::Unary { op, expr } => Expr::Unary {
9671            op,
9672            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
9673        },
9674        Expr::FunctionCall { name, args } => Expr::FunctionCall {
9675            name,
9676            args: args
9677                .into_iter()
9678                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
9679                .collect(),
9680        },
9681        other => other,
9682    }
9683}
9684
9685/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
9686/// about to be inserted into `child_table`, every FK declared on
9687/// that table is checked: the row's FK columns must either be
9688/// NULL (SQL spec skip) or match an existing parent row via the
9689/// parent's BTree PK / UNIQUE index.
9690///
9691/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
9692/// payload on first failure.
9693///
9694/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
9695/// == child_table`, the parent rows visible to this check are
9696///  (a) rows already committed to the table, plus
9697///  (b) earlier rows from the *same* `rows` batch.
9698/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
9699/// work in a single statement — common pattern for bulk-loading
9700/// hierarchies.
9701/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
9702/// constraints at INSERT time. For each constraint declared on
9703/// the target table, check that no existing row + no earlier row
9704/// in the same batch has the same full-column tuple. NULL in
9705/// any column lifts the row out of the check (SQL spec: NULL
9706/// ≠ NULL for uniqueness). mailrs G1 + G6.
9707fn enforce_uniqueness_inserts(
9708    catalog: &Catalog,
9709    child_table: &str,
9710    constraints: &[spg_storage::UniquenessConstraint],
9711    rows: &[Vec<Value>],
9712) -> Result<(), EngineError> {
9713    if constraints.is_empty() {
9714        return Ok(());
9715    }
9716    let table = catalog.get(child_table).ok_or_else(|| {
9717        EngineError::Storage(StorageError::TableNotFound {
9718            name: child_table.into(),
9719        })
9720    })?;
9721    for uc in constraints {
9722        for (batch_idx, row_values) in rows.iter().enumerate() {
9723            let key: Vec<&Value> = uc.columns.iter().map(|&i| &row_values[i]).collect();
9724            let has_null = key.iter().any(|v| matches!(v, Value::Null));
9725            // v7.13.0 — `NULLS NOT DISTINCT` (mailrs round-5 G10,
9726            // PG 15+): two rows whose constrained columns are all
9727            // NULL collide. SQL-standard `NULLS DISTINCT` lets any
9728            // NULL skip the check.
9729            if has_null && !uc.nulls_not_distinct {
9730                continue;
9731            }
9732            // Table-side collision: scan existing rows.
9733            let collides_in_table = table.rows().iter().any(|prow| {
9734                uc.columns
9735                    .iter()
9736                    .enumerate()
9737                    .all(|(i, &p)| prow.values.get(p) == Some(key[i]))
9738            });
9739            // Batch-side collision: earlier rows in the same INSERT.
9740            let collides_in_batch = rows[..batch_idx].iter().any(|earlier| {
9741                uc.columns
9742                    .iter()
9743                    .enumerate()
9744                    .all(|(i, &p)| earlier.get(p) == Some(key[i]))
9745            });
9746            if collides_in_table || collides_in_batch {
9747                let kind = if uc.is_primary_key {
9748                    "PRIMARY KEY"
9749                } else {
9750                    "UNIQUE"
9751                };
9752                let col_names: Vec<String> = uc
9753                    .columns
9754                    .iter()
9755                    .map(|&i| table.schema().columns[i].name.clone())
9756                    .collect();
9757                return Err(EngineError::Unsupported(alloc::format!(
9758                    "{kind} violation on {child_table:?} columns {col_names:?}: \
9759                     row #{batch_idx} duplicates an existing key"
9760                )));
9761            }
9762        }
9763    }
9764    Ok(())
9765}
9766
9767/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
9768/// WHERE-style predicate. NULL → false (three-valued logic
9769/// collapses to "skip this row" for index inclusion). Numeric
9770/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
9771/// Everything else (strings, vectors, JSON, …) is not a valid
9772/// predicate result and surfaces as `false` so a malformed
9773/// predicate degrades to "row not in index" rather than panicking.
9774fn predicate_truthy(v: &spg_storage::Value) -> bool {
9775    use spg_storage::Value as V;
9776    match v {
9777        V::Bool(b) => *b,
9778        V::Int(n) => *n != 0,
9779        V::BigInt(n) => *n != 0,
9780        V::SmallInt(n) => *n != 0,
9781        _ => false,
9782    }
9783}
9784
9785/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
9786/// committed rows for pre-existing duplicates. If any pair of rows
9787/// matches the predicate AND has the same index key, refuse to
9788/// create the index so the user fixes the data before retrying.
9789fn check_existing_unique_violation(
9790    idx: &spg_storage::Index,
9791    schema: &spg_storage::TableSchema,
9792    rows: &[spg_storage::Row],
9793) -> Result<(), EngineError> {
9794    let predicate_expr = match idx.partial_predicate.as_deref() {
9795        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
9796            EngineError::Unsupported(alloc::format!(
9797                "stored partial predicate {s:?} failed to re-parse: {e:?}"
9798            ))
9799        })?),
9800        None => None,
9801    };
9802    let ctx = eval::EvalContext::new(&schema.columns, None);
9803    let key_positions = unique_key_positions(idx);
9804    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
9805    for row in rows {
9806        if let Some(expr) = &predicate_expr {
9807            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
9808                EngineError::Unsupported(alloc::format!(
9809                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
9810                ))
9811            })?;
9812            if !predicate_truthy(&v) {
9813                continue;
9814            }
9815        }
9816        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
9817            .iter()
9818            .map(|&p| {
9819                row.values
9820                    .get(p)
9821                    .cloned()
9822                    .unwrap_or(spg_storage::Value::Null)
9823            })
9824            .collect();
9825        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
9826            continue;
9827        }
9828        if seen.iter().any(|other| *other == key) {
9829            return Err(EngineError::Unsupported(alloc::format!(
9830                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
9831                idx.name
9832            )));
9833        }
9834        seen.push(key);
9835    }
9836    Ok(())
9837}
9838
9839/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
9840/// extra positions). For single-column indexes this is just
9841/// `[column_position]`.
9842fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
9843    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
9844    out.push(idx.column_position);
9845    out.extend_from_slice(&idx.extra_column_positions);
9846    out
9847}
9848
9849/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
9850/// target table. For each, eval the index's optional predicate
9851/// against (a) the candidate row and (b) every committed row plus
9852/// earlier batch rows; only rows where the predicate is truthy
9853/// participate. A duplicate key among predicate-matching rows is a
9854/// uniqueness violation. NULL keys lift the row out of the check
9855/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
9856fn enforce_unique_index_inserts(
9857    catalog: &Catalog,
9858    table_name: &str,
9859    rows: &[alloc::vec::Vec<spg_storage::Value>],
9860) -> Result<(), EngineError> {
9861    let table = catalog.get(table_name).ok_or_else(|| {
9862        EngineError::Storage(StorageError::TableNotFound {
9863            name: table_name.into(),
9864        })
9865    })?;
9866    let schema = table.schema();
9867    let ctx = eval::EvalContext::new(&schema.columns, None);
9868    for idx in table.indices() {
9869        if !idx.is_unique {
9870            continue;
9871        }
9872        // Re-parse the predicate once per index per batch.
9873        let predicate_expr = match idx.partial_predicate.as_deref() {
9874            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
9875                EngineError::Unsupported(alloc::format!(
9876                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
9877                    idx.name
9878                ))
9879            })?),
9880            None => None,
9881        };
9882        let key_positions = unique_key_positions(idx);
9883        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
9884            key_positions
9885                .iter()
9886                .map(|&p| values.get(p).cloned().unwrap_or(spg_storage::Value::Null))
9887                .collect()
9888        };
9889        // Helper: does `values` participate in this index? (predicate
9890        // truthy when present.) Wraps `values` into a transient Row
9891        // because eval_expr requires &Row.
9892        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
9893            let Some(expr) = &predicate_expr else {
9894                return Ok(true);
9895            };
9896            let tmp_row = spg_storage::Row {
9897                values: values.to_vec(),
9898            };
9899            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
9900                EngineError::Unsupported(alloc::format!(
9901                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
9902                    idx.name
9903                ))
9904            })?;
9905            Ok(predicate_truthy(&v))
9906        };
9907        for (batch_idx, row_values) in rows.iter().enumerate() {
9908            if !participates(row_values)? {
9909                continue;
9910            }
9911            let key = key_of(row_values);
9912            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
9913                continue;
9914            }
9915            // Committed-table collision.
9916            for prow in table.rows() {
9917                if !participates(&prow.values)? {
9918                    continue;
9919                }
9920                if key_of(&prow.values) == key {
9921                    return Err(EngineError::Unsupported(alloc::format!(
9922                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
9923                         row #{batch_idx} duplicates an existing key",
9924                        idx.name
9925                    )));
9926                }
9927            }
9928            // Within-batch collision: earlier rows in the same INSERT.
9929            for earlier in &rows[..batch_idx] {
9930                if !participates(earlier)? {
9931                    continue;
9932                }
9933                if key_of(earlier) == key {
9934                    return Err(EngineError::Unsupported(alloc::format!(
9935                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
9936                         row #{batch_idx} duplicates an earlier row in the same batch",
9937                        idx.name
9938                    )));
9939                }
9940            }
9941        }
9942    }
9943    Ok(())
9944}
9945
9946/// v7.13.0 — `UPDATE OF cols` filter helper (mailrs round-5 G7).
9947/// Returns `true` when at least one of `filter_cols` has a
9948/// different value in `new_row` vs `old_row`. Column lookup is
9949/// case-insensitive against `schema_cols`; unknown filter columns
9950/// are treated as "not changed" (the trigger therefore won't
9951/// fire on them — surfacing a parse-time error would be too
9952/// strict for catalog reloads where the schema may have drifted).
9953fn any_column_changed(
9954    filter_cols: &[String],
9955    schema_cols: &[ColumnSchema],
9956    old_row: &Row,
9957    new_row: &Row,
9958) -> bool {
9959    for col_name in filter_cols {
9960        let Some(pos) = schema_cols
9961            .iter()
9962            .position(|c| c.name.eq_ignore_ascii_case(col_name))
9963        else {
9964            continue;
9965        };
9966        let old_v = old_row.values.get(pos);
9967        let new_v = new_row.values.get(pos);
9968        if old_v != new_v {
9969            return true;
9970        }
9971    }
9972    false
9973}
9974
9975/// v7.13.0 — evaluate every CHECK predicate on the schema against
9976/// each candidate row. Mirrors PG semantics: a `false` result
9977/// rejects the mutation; a NULL result *passes* (CHECK rejects
9978/// only on definite-false, not on unknown). mailrs round-5 G3.
9979fn enforce_check_constraints(
9980    catalog: &Catalog,
9981    table_name: &str,
9982    rows: &[alloc::vec::Vec<spg_storage::Value>],
9983) -> Result<(), EngineError> {
9984    let table = catalog.get(table_name).ok_or_else(|| {
9985        EngineError::Storage(StorageError::TableNotFound {
9986            name: table_name.into(),
9987        })
9988    })?;
9989    let schema = table.schema();
9990    if schema.checks.is_empty() {
9991        return Ok(());
9992    }
9993    let ctx = eval::EvalContext::new(&schema.columns, None);
9994    let mut parsed: alloc::vec::Vec<(usize, Expr)> = alloc::vec::Vec::new();
9995    for (i, src) in schema.checks.iter().enumerate() {
9996        let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
9997            EngineError::Unsupported(alloc::format!(
9998                "CHECK constraint #{i} on {table_name:?} ({src:?}) failed to re-parse: {e:?}"
9999            ))
10000        })?;
10001        parsed.push((i, expr));
10002    }
10003    for (batch_idx, row_values) in rows.iter().enumerate() {
10004        let tmp_row = spg_storage::Row {
10005            values: row_values.clone(),
10006        };
10007        for (i, expr) in &parsed {
10008            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
10009                EngineError::Unsupported(alloc::format!(
10010                    "CHECK constraint #{i} on {table_name:?} eval at row #{batch_idx}: {e:?}"
10011                ))
10012            })?;
10013            // PG: NULL passes (CHECK rejects on definite-false only).
10014            if matches!(v, spg_storage::Value::Bool(false)) {
10015                return Err(EngineError::Unsupported(alloc::format!(
10016                    "CHECK constraint violation on {table_name:?} (row #{batch_idx}): {:?}",
10017                    schema.checks[*i]
10018                )));
10019            }
10020        }
10021    }
10022    Ok(())
10023}
10024
10025fn enforce_fk_inserts(
10026    catalog: &Catalog,
10027    child_table: &str,
10028    fks: &[spg_storage::ForeignKeyConstraint],
10029    rows: &[Vec<Value>],
10030) -> Result<(), EngineError> {
10031    for fk in fks {
10032        let parent_is_self = fk.parent_table == child_table;
10033        let parent = if parent_is_self {
10034            // Self-ref: read the current state of the same table.
10035            // The mut borrow on child has been dropped by the caller.
10036            catalog.get(child_table).ok_or_else(|| {
10037                EngineError::Storage(StorageError::TableNotFound {
10038                    name: child_table.into(),
10039                })
10040            })?
10041        } else {
10042            catalog.get(&fk.parent_table).ok_or_else(|| {
10043                EngineError::Storage(StorageError::TableNotFound {
10044                    name: fk.parent_table.clone(),
10045                })
10046            })?
10047        };
10048        for (batch_idx, row_values) in rows.iter().enumerate() {
10049            // Single-column FK fast path: try the parent's BTree
10050            // index for an O(log n) lookup. Composite FKs fall back
10051            // to a parent-row scan.
10052            if fk.local_columns.len() == 1 {
10053                let v = &row_values[fk.local_columns[0]];
10054                if matches!(v, Value::Null) {
10055                    continue;
10056                }
10057                let parent_col = fk.parent_columns[0];
10058                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
10059                    EngineError::Unsupported(alloc::format!(
10060                        "FOREIGN KEY column value of type {:?} is not index-eligible",
10061                        v.data_type()
10062                    ))
10063                })?;
10064                let present_committed = parent.indices().iter().any(|idx| {
10065                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10066                        && idx.column_position == parent_col
10067                        && idx.partial_predicate.is_none()
10068                        && !idx.lookup_eq(&key).is_empty()
10069                });
10070                // v7.6.7 self-ref widening: also accept a match
10071                // against earlier rows in this same batch when the
10072                // FK points at the table being inserted into.
10073                let present_in_batch = parent_is_self
10074                    && rows[..batch_idx]
10075                        .iter()
10076                        .any(|earlier| earlier.get(parent_col) == Some(v));
10077                if !(present_committed || present_in_batch) {
10078                    return Err(EngineError::Unsupported(alloc::format!(
10079                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
10080                        fk.parent_table,
10081                        parent
10082                            .schema()
10083                            .columns
10084                            .get(parent_col)
10085                            .map_or("?", |c| c.name.as_str()),
10086                        v,
10087                    )));
10088                }
10089            } else {
10090                // Composite FK: scan parent rows. v7.6.7 also
10091                // accepts a match against earlier rows in the same
10092                // batch (self-ref bulk-loading of hierarchies).
10093                if fk
10094                    .local_columns
10095                    .iter()
10096                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
10097                {
10098                    continue;
10099                }
10100                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
10101                let parent_match_committed = parent.rows().iter().any(|prow| {
10102                    fk.parent_columns
10103                        .iter()
10104                        .enumerate()
10105                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
10106                });
10107                let parent_match_in_batch = parent_is_self
10108                    && rows[..batch_idx].iter().any(|earlier| {
10109                        fk.parent_columns
10110                            .iter()
10111                            .enumerate()
10112                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
10113                    });
10114                if !(parent_match_committed || parent_match_in_batch) {
10115                    return Err(EngineError::Unsupported(alloc::format!(
10116                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
10117                        fk.parent_table,
10118                    )));
10119                }
10120            }
10121        }
10122    }
10123    Ok(())
10124}
10125
10126/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
10127/// DELETE on a parent. The plan is a list of these steps, stacked
10128/// across the FK graph by `plan_fk_parent_deletions`.
10129#[derive(Debug, Clone)]
10130struct FkChildStep {
10131    child_table: String,
10132    action: FkChildAction,
10133}
10134
10135#[derive(Debug, Clone)]
10136enum FkChildAction {
10137    /// CASCADE — remove these rows. Sorted, deduplicated positions.
10138    Delete { positions: Vec<usize> },
10139    /// SET NULL — for each (row, column) in the flat list, write
10140    /// NULL into that child cell. Multiple FKs on the same row may
10141    /// produce overlapping entries (deduped at plan time).
10142    SetNull {
10143        positions: Vec<usize>,
10144        columns: Vec<usize>,
10145    },
10146    /// SET DEFAULT — same shape as SetNull but writes the column's
10147    /// declared DEFAULT value (resolved at plan time). Columns
10148    /// without a DEFAULT raise an error during planning.
10149    SetDefault {
10150        positions: Vec<usize>,
10151        columns: Vec<usize>,
10152        defaults: Vec<Value>,
10153    },
10154}
10155
10156/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
10157///
10158/// Walks every table in the catalog looking for FKs whose
10159/// `parent_table` is `parent_table_name`. For each such FK + each
10160/// to-be-deleted parent row:
10161///
10162///   - RESTRICT / NoAction → error, no plan returned
10163///   - CASCADE → child rows get scheduled for deletion; recursive
10164///   - SetNull → child FK column(s) scheduled to be NULL-ed.
10165///     Verified NULL-able at plan time.
10166///   - SetDefault → child FK column(s) scheduled to be reset to
10167///     their declared DEFAULT. Columns without a DEFAULT raise.
10168///
10169/// SET NULL / SET DEFAULT do NOT cascade further — the child row
10170/// stays; only one of its columns mutates.
10171fn plan_fk_parent_deletions(
10172    catalog: &Catalog,
10173    parent_table_name: &str,
10174    to_delete_positions: &[usize],
10175    to_delete_rows: &[Vec<Value>],
10176) -> Result<Vec<FkChildStep>, EngineError> {
10177    use alloc::collections::{BTreeMap, BTreeSet};
10178    if to_delete_rows.is_empty() {
10179        return Ok(Vec::new());
10180    }
10181    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
10182    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
10183    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
10184    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10185    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
10186    for &p in to_delete_positions {
10187        visited.insert((parent_table_name.to_string(), p));
10188    }
10189    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
10190        .iter()
10191        .map(|r| (parent_table_name.to_string(), r.clone()))
10192        .collect();
10193    while let Some((cur_parent, parent_row)) = work.pop() {
10194        for child_name in catalog.table_names() {
10195            let child = catalog
10196                .get(&child_name)
10197                .expect("table_names → catalog.get round-trip is total");
10198            for fk in &child.schema().foreign_keys {
10199                if fk.parent_table != cur_parent {
10200                    continue;
10201                }
10202                let parent_key: Vec<&Value> = fk
10203                    .parent_columns
10204                    .iter()
10205                    .map(|&pi| &parent_row[pi])
10206                    .collect();
10207                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
10208                    continue;
10209                }
10210                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
10211                    if child_name == cur_parent
10212                        && visited.contains(&(child_name.clone(), child_row_idx))
10213                    {
10214                        continue;
10215                    }
10216                    let matches_key = fk
10217                        .local_columns
10218                        .iter()
10219                        .enumerate()
10220                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
10221                    if !matches_key {
10222                        continue;
10223                    }
10224                    match fk.on_delete {
10225                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
10226                            return Err(EngineError::Unsupported(alloc::format!(
10227                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
10228                                 restricted by FK from {child_name:?}.{:?}",
10229                                fk.local_columns,
10230                            )));
10231                        }
10232                        spg_storage::FkAction::Cascade => {
10233                            if visited.insert((child_name.clone(), child_row_idx)) {
10234                                delete_plan
10235                                    .entry(child_name.clone())
10236                                    .or_default()
10237                                    .insert(child_row_idx);
10238                                work.push((child_name.clone(), child_row.values.clone()));
10239                            }
10240                        }
10241                        spg_storage::FkAction::SetNull => {
10242                            // Verify every local FK column is NULL-able.
10243                            for &li in &fk.local_columns {
10244                                let col = child.schema().columns.get(li).ok_or_else(|| {
10245                                    EngineError::Unsupported(alloc::format!(
10246                                        "FK local column {li} missing in {child_name:?}"
10247                                    ))
10248                                })?;
10249                                if !col.nullable {
10250                                    return Err(EngineError::Unsupported(alloc::format!(
10251                                        "FOREIGN KEY ON DELETE SET NULL: column \
10252                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
10253                                        col.name,
10254                                    )));
10255                                }
10256                            }
10257                            let entry = setnull_plan.entry(child_name.clone()).or_default();
10258                            for &li in &fk.local_columns {
10259                                entry.insert((child_row_idx, li));
10260                            }
10261                        }
10262                        spg_storage::FkAction::SetDefault => {
10263                            // Resolve the DEFAULT for every local FK col.
10264                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
10265                            for &li in &fk.local_columns {
10266                                let col = child.schema().columns.get(li).ok_or_else(|| {
10267                                    EngineError::Unsupported(alloc::format!(
10268                                        "FK local column {li} missing in {child_name:?}"
10269                                    ))
10270                                })?;
10271                                let default = col.default.clone().ok_or_else(|| {
10272                                    EngineError::Unsupported(alloc::format!(
10273                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
10274                                         {child_name:?}.{:?} has no DEFAULT declared",
10275                                        col.name,
10276                                    ))
10277                                })?;
10278                                entry.insert((child_row_idx, li), default);
10279                            }
10280                        }
10281                    }
10282                }
10283            }
10284        }
10285    }
10286    // Flatten the three plans into the ordered `FkChildStep` list.
10287    // Deletes are applied last per child (after any null/default
10288    // re-writes on the same child) so a child row that's both
10289    // re-written and then cascade-deleted only ends up deleted —
10290    // but in v7.6.5 SetNull/Cascade never overlap on the same row
10291    // (a single FK chooses exactly one action), so the order is
10292    // mostly a precaution.
10293    let mut steps: Vec<FkChildStep> = Vec::new();
10294    for (child_table, entries) in setnull_plan {
10295        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
10296        steps.push(FkChildStep {
10297            child_table,
10298            action: FkChildAction::SetNull { positions, columns },
10299        });
10300    }
10301    for (child_table, entries) in setdefault_plan {
10302        let mut positions = Vec::with_capacity(entries.len());
10303        let mut columns = Vec::with_capacity(entries.len());
10304        let mut defaults = Vec::with_capacity(entries.len());
10305        for ((p, c), v) in entries {
10306            positions.push(p);
10307            columns.push(c);
10308            defaults.push(v);
10309        }
10310        steps.push(FkChildStep {
10311            child_table,
10312            action: FkChildAction::SetDefault {
10313                positions,
10314                columns,
10315                defaults,
10316            },
10317        });
10318    }
10319    for (child_table, positions) in delete_plan {
10320        steps.push(FkChildStep {
10321            child_table,
10322            action: FkChildAction::Delete {
10323                positions: positions.into_iter().collect(),
10324            },
10325        });
10326    }
10327    Ok(steps)
10328}
10329
10330/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
10331/// PK/UNIQUE columns. Walks every other table whose FK references
10332/// `parent_table_name`; for each FK whose parent_columns overlap a
10333/// mutated column, decides the action by `fk.on_update`.
10334///
10335///   - RESTRICT / NoAction → error if any child references the OLD
10336///     value
10337///   - CASCADE → child FK columns get rewritten to the NEW parent
10338///     value (a SetNull-style update step with the new value)
10339///   - SetNull → child FK columns set to NULL
10340///   - SetDefault → child FK columns set to declared default
10341///
10342/// `plan_with_old` is `(row_position, old_values, new_values)` so
10343/// the planner can detect "did this row's parent key actually
10344/// change?" — only rows where at least one referenced parent
10345/// column moved trigger inbound work.
10346fn plan_fk_parent_updates(
10347    catalog: &Catalog,
10348    parent_table_name: &str,
10349    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
10350) -> Result<Vec<FkChildStep>, EngineError> {
10351    use alloc::collections::BTreeMap;
10352    if plan_with_old.is_empty() {
10353        return Ok(Vec::new());
10354    }
10355    // For each child table we may touch, build per-child step
10356    // lists. UPDATE never deletes children — `delete_plan` stays
10357    // empty here but is kept structurally aligned with
10358    // `plan_fk_parent_deletions` for future use.
10359    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
10360    let mut setnull_plan: BTreeMap<String, alloc::collections::BTreeSet<(usize, usize)>> =
10361        BTreeMap::new();
10362    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10363    // Cascade-update plan: child_table → row_idx → col_idx → new_value
10364    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10365
10366    for child_name in catalog.table_names() {
10367        let child = catalog
10368            .get(&child_name)
10369            .expect("table_names → catalog.get total");
10370        for fk in &child.schema().foreign_keys {
10371            if fk.parent_table != parent_table_name {
10372                continue;
10373            }
10374            for (_pos, old_row, new_row) in plan_with_old {
10375                // Did any parent FK column change?
10376                let key_changed = fk
10377                    .parent_columns
10378                    .iter()
10379                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
10380                if !key_changed {
10381                    continue;
10382                }
10383                // The OLD parent key — used to find referring children.
10384                let old_key: Vec<&Value> =
10385                    fk.parent_columns.iter().map(|&pi| &old_row[pi]).collect();
10386                if old_key.iter().any(|v| matches!(v, Value::Null)) {
10387                    // NULL parent has no children — skip.
10388                    continue;
10389                }
10390                let new_key: Vec<&Value> =
10391                    fk.parent_columns.iter().map(|&pi| &new_row[pi]).collect();
10392                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
10393                    // Self-ref same-row updates: a row updating its
10394                    // own PK doesn't restrict itself.
10395                    if child_name == parent_table_name
10396                        && plan_with_old.iter().any(|(p, _, _)| *p == child_row_idx)
10397                    {
10398                        continue;
10399                    }
10400                    let matches_key = fk
10401                        .local_columns
10402                        .iter()
10403                        .enumerate()
10404                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
10405                    if !matches_key {
10406                        continue;
10407                    }
10408                    match fk.on_update {
10409                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
10410                            return Err(EngineError::Unsupported(alloc::format!(
10411                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
10412                                 restricted by FK from {child_name:?}.{:?}",
10413                                fk.local_columns,
10414                            )));
10415                        }
10416                        spg_storage::FkAction::Cascade => {
10417                            // Rewrite child FK columns to new key.
10418                            let entry = cascade_plan.entry(child_name.clone()).or_default();
10419                            for (i, &li) in fk.local_columns.iter().enumerate() {
10420                                entry.insert((child_row_idx, li), new_key[i].clone());
10421                            }
10422                        }
10423                        spg_storage::FkAction::SetNull => {
10424                            for &li in &fk.local_columns {
10425                                let col = child.schema().columns.get(li).ok_or_else(|| {
10426                                    EngineError::Unsupported(alloc::format!(
10427                                        "FK local column {li} missing in {child_name:?}"
10428                                    ))
10429                                })?;
10430                                if !col.nullable {
10431                                    return Err(EngineError::Unsupported(alloc::format!(
10432                                        "FOREIGN KEY ON UPDATE SET NULL: column \
10433                                         {child_name:?}.{:?} is NOT NULL",
10434                                        col.name,
10435                                    )));
10436                                }
10437                            }
10438                            let entry = setnull_plan.entry(child_name.clone()).or_default();
10439                            for &li in &fk.local_columns {
10440                                entry.insert((child_row_idx, li));
10441                            }
10442                        }
10443                        spg_storage::FkAction::SetDefault => {
10444                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
10445                            for &li in &fk.local_columns {
10446                                let col = child.schema().columns.get(li).ok_or_else(|| {
10447                                    EngineError::Unsupported(alloc::format!(
10448                                        "FK local column {li} missing in {child_name:?}"
10449                                    ))
10450                                })?;
10451                                let default = col.default.clone().ok_or_else(|| {
10452                                    EngineError::Unsupported(alloc::format!(
10453                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
10454                                         {child_name:?}.{:?} has no DEFAULT",
10455                                        col.name,
10456                                    ))
10457                                })?;
10458                                entry.insert((child_row_idx, li), default);
10459                            }
10460                        }
10461                    }
10462                }
10463            }
10464        }
10465    }
10466    // Flatten into FkChildStep list. UPDATE doesn't produce
10467    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
10468    let mut steps: Vec<FkChildStep> = Vec::new();
10469    for (child_table, entries) in cascade_plan {
10470        let mut positions = Vec::with_capacity(entries.len());
10471        let mut columns = Vec::with_capacity(entries.len());
10472        let mut defaults = Vec::with_capacity(entries.len());
10473        for ((p, c), v) in entries {
10474            positions.push(p);
10475            columns.push(c);
10476            defaults.push(v);
10477        }
10478        // We reuse `FkChildAction::SetDefault` for cascade-update:
10479        // both shapes are "write a known value into specific cells"
10480        // — `apply_per_cell_writes` doesn't care whether the value
10481        // came from a DEFAULT declaration or a new parent key.
10482        steps.push(FkChildStep {
10483            child_table,
10484            action: FkChildAction::SetDefault {
10485                positions,
10486                columns,
10487                defaults,
10488            },
10489        });
10490    }
10491    for (child_table, entries) in setnull_plan {
10492        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
10493        steps.push(FkChildStep {
10494            child_table,
10495            action: FkChildAction::SetNull { positions, columns },
10496        });
10497    }
10498    for (child_table, entries) in setdefault_plan {
10499        let mut positions = Vec::with_capacity(entries.len());
10500        let mut columns = Vec::with_capacity(entries.len());
10501        let mut defaults = Vec::with_capacity(entries.len());
10502        for ((p, c), v) in entries {
10503            positions.push(p);
10504            columns.push(c);
10505            defaults.push(v);
10506        }
10507        steps.push(FkChildStep {
10508            child_table,
10509            action: FkChildAction::SetDefault {
10510                positions,
10511                columns,
10512                defaults,
10513            },
10514        });
10515    }
10516    let _ = delete_plan; // UPDATE never deletes children.
10517    Ok(steps)
10518}
10519
10520/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
10521/// the three action variants so the DELETE executor stays a
10522/// simple loop over the planned steps.
10523fn apply_fk_child_step(catalog: &mut Catalog, step: &FkChildStep) -> Result<(), EngineError> {
10524    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
10525        EngineError::Storage(StorageError::TableNotFound {
10526            name: step.child_table.clone(),
10527        })
10528    })?;
10529    match &step.action {
10530        FkChildAction::Delete { positions } => {
10531            let _ = child.delete_rows(positions);
10532        }
10533        FkChildAction::SetNull { positions, columns } => {
10534            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
10535        }
10536        FkChildAction::SetDefault {
10537            positions,
10538            columns,
10539            defaults,
10540        } => {
10541            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
10542        }
10543    }
10544    Ok(())
10545}
10546
10547/// v7.6.5 — write new values into selected child cells via
10548/// `Table::update_row` (the catalog's existing UPDATE entry).
10549/// Groups writes by row position so multi-column updates on the
10550/// same row only call `update_row` once. `value_for(i)` produces
10551/// the new value for the i-th (position, column) entry.
10552fn apply_per_cell_writes(
10553    child: &mut spg_storage::Table,
10554    positions: &[usize],
10555    columns: &[usize],
10556    mut value_for: impl FnMut(usize) -> Value,
10557) -> Result<(), EngineError> {
10558    use alloc::collections::BTreeMap;
10559    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
10560    for i in 0..positions.len() {
10561        by_row
10562            .entry(positions[i])
10563            .or_default()
10564            .push((columns[i], value_for(i)));
10565    }
10566    for (pos, mutations) in by_row {
10567        let mut new_values = child.rows()[pos].values.clone();
10568        for (col, v) in mutations {
10569            if let Some(slot) = new_values.get_mut(col) {
10570                *slot = v;
10571            }
10572        }
10573        child
10574            .update_row(pos, new_values)
10575            .map_err(EngineError::Storage)?;
10576    }
10577    Ok(())
10578}
10579
10580fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
10581    match a {
10582        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
10583        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
10584        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
10585        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
10586        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
10587    }
10588}
10589
10590/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
10591/// default-fill. Free fn (rather than `&self`) so callers
10592/// with an active `&mut Table` borrow can still use it.
10593/// Literal defaults take the cached path (`col.default`);
10594/// runtime defaults hit `clock_fn` at each call. mailrs G4.
10595fn resolve_column_default_free(
10596    col: &ColumnSchema,
10597    clock_fn: Option<ClockFn>,
10598) -> Result<Value, EngineError> {
10599    if let Some(rt) = &col.runtime_default {
10600        return eval_runtime_default_free(rt, col.ty, clock_fn);
10601    }
10602    Ok(col.default.clone().unwrap_or(Value::Null))
10603}
10604
10605fn eval_runtime_default_free(
10606    rt: &str,
10607    ty: DataType,
10608    clock_fn: Option<ClockFn>,
10609) -> Result<Value, EngineError> {
10610    let s = rt.trim().to_ascii_lowercase();
10611    let canonical = s.trim_end_matches("()");
10612    let now_us = match clock_fn {
10613        Some(f) => f(),
10614        None => 0,
10615    };
10616    let v = match canonical {
10617        "now" | "current_timestamp" | "localtimestamp" => Value::Timestamp(now_us),
10618        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
10619        "current_time" | "localtime" => Value::Timestamp(now_us),
10620        other => {
10621            return Err(EngineError::Unsupported(alloc::format!(
10622                "runtime DEFAULT expression {other:?} not supported \
10623                 (v7.9.21 whitelist: now() / current_timestamp / \
10624                 current_date / current_time / localtimestamp / \
10625                 localtime)"
10626            )));
10627        }
10628    };
10629    coerce_value(v, ty, "DEFAULT", 0)
10630}
10631
10632/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
10633/// evaluation rather than being cacheable as a literal Value.
10634/// FunctionCall is the immediate case (`now()`,
10635/// `current_timestamp`). Literal expressions and simple sign-
10636/// flipped numerics still take the static-cache path.
10637fn is_runtime_default_expr(expr: &Expr) -> bool {
10638    match expr {
10639        Expr::FunctionCall { .. } => true,
10640        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
10641        _ => false,
10642    }
10643}
10644
10645fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
10646    let ty = column_type_to_data_type(c.ty);
10647    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
10648    if let Some(default_expr) = c.default {
10649        // v7.9.21 — distinguish literal defaults (evaluated once
10650        // at CREATE TABLE) from expression defaults (deferred to
10651        // INSERT). Function calls (`now()`, `current_timestamp`
10652        // — see v7.9.20 keyword promotion) take the runtime path.
10653        // Literals continue to cache. mailrs G4.
10654        if is_runtime_default_expr(&default_expr) {
10655            let display = alloc::format!("{default_expr}");
10656            schema = schema.with_runtime_default(display);
10657        } else {
10658            let raw = literal_expr_to_value(default_expr)?;
10659            let coerced = coerce_value(raw, ty, &c.name, 0)?;
10660            schema = schema.with_default(coerced);
10661        }
10662    }
10663    if c.auto_increment {
10664        // AUTO_INCREMENT only makes sense on integer-shaped columns.
10665        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
10666            return Err(EngineError::Unsupported(alloc::format!(
10667                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
10668            )));
10669        }
10670        schema = schema.with_auto_increment();
10671    }
10672    Ok(schema)
10673}
10674
10675/// v7.10.4 — decode a BYTEA literal. Accepts:
10676///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
10677///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
10678///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
10679fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
10680    let s = s.trim();
10681    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
10682        // Hex form. Each pair of hex digits → one byte.
10683        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
10684        if cleaned.len() % 2 != 0 {
10685            return Err("odd-length hex literal");
10686        }
10687        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
10688        let cleaned_bytes = cleaned.as_bytes();
10689        for i in (0..cleaned_bytes.len()).step_by(2) {
10690            let hi = hex_nibble(cleaned_bytes[i])?;
10691            let lo = hex_nibble(cleaned_bytes[i + 1])?;
10692            out.push((hi << 4) | lo);
10693        }
10694        return Ok(out);
10695    }
10696    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
10697    // sequences decode; anything else is a literal byte.
10698    let bytes = s.as_bytes();
10699    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
10700    let mut i = 0;
10701    while i < bytes.len() {
10702        let b = bytes[i];
10703        if b == b'\\' && i + 1 < bytes.len() {
10704            let n = bytes[i + 1];
10705            if n == b'\\' {
10706                out.push(b'\\');
10707                i += 2;
10708                continue;
10709            }
10710            if n.is_ascii_digit()
10711                && i + 3 < bytes.len()
10712                && bytes[i + 2].is_ascii_digit()
10713                && bytes[i + 3].is_ascii_digit()
10714            {
10715                let oct = |x: u8| (x - b'0') as u32;
10716                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
10717                if v <= 0xFF {
10718                    out.push(v as u8);
10719                    i += 4;
10720                    continue;
10721                }
10722            }
10723        }
10724        out.push(b);
10725        i += 1;
10726    }
10727    Ok(out)
10728}
10729
10730fn hex_nibble(b: u8) -> Result<u8, &'static str> {
10731    match b {
10732        b'0'..=b'9' => Ok(b - b'0'),
10733        b'a'..=b'f' => Ok(b - b'a' + 10),
10734        b'A'..=b'F' => Ok(b - b'A' + 10),
10735        _ => Err("invalid hex digit"),
10736    }
10737}
10738
10739/// v7.10.11 — decode a PG TEXT[] external array form
10740/// (`{a,b,NULL}` with optional double-quoted elements). The
10741/// engine takes a leading/trailing `{`/`}` and splits at commas.
10742/// Quoted elements (`"hello, world"`) preserve embedded commas;
10743/// `\\` and `\"` decode to literal backslash / quote. Plain
10744/// unquoted `NULL` (case-insensitive) maps to `None`.
10745/// v7.11.13 — pick the array type for `ARRAY[lit, …]` from the
10746/// element values. Single-element-type rules:
10747///   - all NULL / all Text → TextArray
10748///   - all Int (or Int+NULL) → IntArray
10749///   - any BigInt without Text → BigIntArray (widening)
10750///   - any Text → TextArray (fallback; non-string elements
10751///     render as text)
10752fn array_literal_widen(items: alloc::vec::Vec<Value>) -> Value {
10753    let mut has_text = false;
10754    let mut has_bigint = false;
10755    let mut has_int = false;
10756    for v in &items {
10757        match v {
10758            Value::Null => {}
10759            Value::Text(_) | Value::Json(_) => has_text = true,
10760            Value::BigInt(_) => has_bigint = true,
10761            Value::Int(_) | Value::SmallInt(_) => has_int = true,
10762            _ => has_text = true,
10763        }
10764    }
10765    if has_text || (!has_bigint && !has_int) {
10766        let out: alloc::vec::Vec<Option<alloc::string::String>> = items
10767            .into_iter()
10768            .map(|v| match v {
10769                Value::Null => None,
10770                Value::Text(s) | Value::Json(s) => Some(s),
10771                other => Some(alloc::format!("{other:?}")),
10772            })
10773            .collect();
10774        return Value::TextArray(out);
10775    }
10776    if has_bigint {
10777        let out: alloc::vec::Vec<Option<i64>> = items
10778            .into_iter()
10779            .map(|v| match v {
10780                Value::Null => None,
10781                Value::Int(n) => Some(i64::from(n)),
10782                Value::SmallInt(n) => Some(i64::from(n)),
10783                Value::BigInt(n) => Some(n),
10784                _ => unreachable!("widen: unexpected non-integer in BigInt path"),
10785            })
10786            .collect();
10787        return Value::BigIntArray(out);
10788    }
10789    let out: alloc::vec::Vec<Option<i32>> = items
10790        .into_iter()
10791        .map(|v| match v {
10792            Value::Null => None,
10793            Value::Int(n) => Some(n),
10794            Value::SmallInt(n) => Some(i32::from(n)),
10795            _ => unreachable!("widen: unexpected non-i32-compatible in Int path"),
10796        })
10797        .collect();
10798    Value::IntArray(out)
10799}
10800
10801fn decode_text_array_literal(
10802    s: &str,
10803) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
10804    let trimmed = s.trim();
10805    let inner = trimmed
10806        .strip_prefix('{')
10807        .and_then(|x| x.strip_suffix('}'))
10808        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
10809    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
10810    if inner.trim().is_empty() {
10811        return Ok(out);
10812    }
10813    let bytes = inner.as_bytes();
10814    let mut i = 0;
10815    while i <= bytes.len() {
10816        // Skip leading whitespace.
10817        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
10818            i += 1;
10819        }
10820        // Quoted element.
10821        if i < bytes.len() && bytes[i] == b'"' {
10822            i += 1; // open quote
10823            let mut buf = alloc::string::String::new();
10824            while i < bytes.len() && bytes[i] != b'"' {
10825                if bytes[i] == b'\\' && i + 1 < bytes.len() {
10826                    buf.push(bytes[i + 1] as char);
10827                    i += 2;
10828                } else {
10829                    buf.push(bytes[i] as char);
10830                    i += 1;
10831                }
10832            }
10833            if i >= bytes.len() {
10834                return Err("unterminated quoted element");
10835            }
10836            i += 1; // close quote
10837            out.push(Some(buf));
10838        } else {
10839            // Unquoted element — read until next comma or end.
10840            let start = i;
10841            while i < bytes.len() && bytes[i] != b',' {
10842                i += 1;
10843            }
10844            let raw = inner[start..i].trim();
10845            if raw.eq_ignore_ascii_case("NULL") {
10846                out.push(None);
10847            } else {
10848                out.push(Some(alloc::string::ToString::to_string(raw)));
10849            }
10850        }
10851        // Skip whitespace, expect comma or end.
10852        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
10853            i += 1;
10854        }
10855        if i >= bytes.len() {
10856            break;
10857        }
10858        if bytes[i] != b',' {
10859            return Err("expected ',' between TEXT[] elements");
10860        }
10861        i += 1;
10862    }
10863    Ok(out)
10864}
10865
10866/// v7.10.11 — encode a TEXT[] back into the PG external array
10867/// form. NULL elements become the literal `NULL`; elements
10868/// containing commas, quotes, backslashes, or braces are
10869/// double-quoted with `\\` / `\"` escapes.
10870fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
10871    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
10872    out.push('{');
10873    for (i, item) in items.iter().enumerate() {
10874        if i > 0 {
10875            out.push(',');
10876        }
10877        match item {
10878            None => out.push_str("NULL"),
10879            Some(s) => {
10880                let needs_quote = s.is_empty()
10881                    || s.eq_ignore_ascii_case("NULL")
10882                    || s.chars()
10883                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
10884                if needs_quote {
10885                    out.push('"');
10886                    for c in s.chars() {
10887                        if c == '"' || c == '\\' {
10888                            out.push('\\');
10889                        }
10890                        out.push(c);
10891                    }
10892                    out.push('"');
10893                } else {
10894                    out.push_str(s);
10895                }
10896            }
10897        }
10898    }
10899    out.push('}');
10900    out
10901}
10902
10903/// v7.10.4 — encode BYTEA bytes in PG hex output format
10904/// (`\x` prefix, lowercase hex pairs). Used by Text-side
10905/// round-trip + the wire layer's text-mode encoder.
10906fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
10907    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
10908    out.push_str("\\x");
10909    for byte in b {
10910        let hi = byte >> 4;
10911        let lo = byte & 0x0F;
10912        out.push(hex_digit(hi));
10913        out.push(hex_digit(lo));
10914    }
10915    out
10916}
10917
10918const fn hex_digit(n: u8) -> char {
10919    match n {
10920        0..=9 => (b'0' + n) as char,
10921        10..=15 => (b'a' + n - 10) as char,
10922        _ => '?',
10923    }
10924}
10925
10926const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
10927    match t {
10928        ColumnTypeName::SmallInt => DataType::SmallInt,
10929        ColumnTypeName::Int => DataType::Int,
10930        ColumnTypeName::BigInt => DataType::BigInt,
10931        ColumnTypeName::Float => DataType::Float,
10932        ColumnTypeName::Text => DataType::Text,
10933        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
10934        ColumnTypeName::Char(n) => DataType::Char(n),
10935        ColumnTypeName::Bool => DataType::Bool,
10936        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
10937            dim,
10938            encoding: match encoding {
10939                SqlVecEncoding::F32 => VecEncoding::F32,
10940                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
10941                SqlVecEncoding::F16 => VecEncoding::F16,
10942            },
10943        },
10944        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
10945        ColumnTypeName::Date => DataType::Date,
10946        ColumnTypeName::Timestamp => DataType::Timestamp,
10947        ColumnTypeName::Timestamptz => DataType::Timestamptz,
10948        ColumnTypeName::Json => DataType::Json,
10949        ColumnTypeName::Jsonb => DataType::Jsonb,
10950        ColumnTypeName::Bytes => DataType::Bytes,
10951        ColumnTypeName::TextArray => DataType::TextArray,
10952        ColumnTypeName::IntArray => DataType::IntArray,
10953        ColumnTypeName::BigIntArray => DataType::BigIntArray,
10954        ColumnTypeName::TsVector => DataType::TsVector,
10955        ColumnTypeName::TsQuery => DataType::TsQuery,
10956    }
10957}
10958
10959/// Convert an INSERT VALUES expression to a storage Value. Supports literal
10960/// expressions, unary-minus over numeric literals, and pgvector-style
10961/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
10962fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
10963    match expr {
10964        Expr::Literal(l) => Ok(literal_to_value(l)),
10965        Expr::Cast { expr, target } => {
10966            let inner_value = literal_expr_to_value(*expr)?;
10967            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
10968        }
10969        Expr::Unary {
10970            op: UnOp::Neg,
10971            expr,
10972        } => match *expr {
10973            Expr::Literal(Literal::Integer(n)) => {
10974                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
10975                // — overflow on negate of i64::MIN is the one edge case.
10976                let neg = n.checked_neg().ok_or_else(|| {
10977                    EngineError::Unsupported("integer literal overflow on negation".into())
10978                })?;
10979                Ok(int_value_for(neg))
10980            }
10981            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
10982            other => Err(EngineError::Unsupported(alloc::format!(
10983                "unary minus over non-literal expression: {other:?}"
10984            ))),
10985        },
10986        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
10987        // INSERT-time. Each element must reduce to a Value through
10988        // `literal_expr_to_value`; NULL elements become `None`.
10989        // v7.11.13 — deduce shape from element values: all Int →
10990        // IntArray; any BigInt → BigIntArray (widening); any Text
10991        // → TextArray. Cast targets (`ARRAY[]::INT[]`) flow through
10992        // the outer Cast arm before reaching here and re-coerce.
10993        Expr::Array(items) => {
10994            let mut materialised: alloc::vec::Vec<Value> =
10995                alloc::vec::Vec::with_capacity(items.len());
10996            for elem in items {
10997                materialised.push(literal_expr_to_value(elem)?);
10998            }
10999            Ok(array_literal_widen(materialised))
11000        }
11001        other => Err(EngineError::Unsupported(alloc::format!(
11002            "non-literal INSERT value expression: {other:?}"
11003        ))),
11004    }
11005}
11006
11007fn literal_to_value(l: Literal) -> Value {
11008    match l {
11009        Literal::Integer(n) => int_value_for(n),
11010        Literal::Float(x) => Value::Float(x),
11011        Literal::String(s) => Value::Text(s),
11012        Literal::Bool(b) => Value::Bool(b),
11013        Literal::Null => Value::Null,
11014        Literal::Vector(v) => Value::Vector(v),
11015        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
11016    }
11017}
11018
11019/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
11020/// columns will still enforce the right tag downstream — this is just the
11021/// default we synthesise from an unannotated integer literal.
11022fn int_value_for(n: i64) -> Value {
11023    if let Ok(small) = i32::try_from(n) {
11024        Value::Int(small)
11025    } else {
11026        Value::BigInt(n)
11027    }
11028}
11029
11030/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
11031/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
11032/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
11033/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
11034/// `NULL` is always permitted; the nullability check happens later in storage.
11035#[allow(clippy::too_many_lines)]
11036fn coerce_value(
11037    v: Value,
11038    expected: DataType,
11039    col_name: &str,
11040    position: usize,
11041) -> Result<Value, EngineError> {
11042    if v.is_null() {
11043        return Ok(Value::Null);
11044    }
11045    let actual = v.data_type().expect("non-null");
11046    if actual == expected {
11047        return Ok(v);
11048    }
11049    let coerced = match (v, expected) {
11050        (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11051        (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11052        (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11053        (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11054            i128::from(n),
11055            precision,
11056            scale,
11057            col_name,
11058        )?),
11059        (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
11060        (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11061        (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11062        (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11063            i128::from(n),
11064            precision,
11065            scale,
11066            col_name,
11067        )?),
11068        (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
11069        (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11070        #[allow(clippy::cast_precision_loss)]
11071        (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
11072        (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11073            i128::from(n),
11074            precision,
11075            scale,
11076            col_name,
11077        )?),
11078        (Value::Float(x), DataType::Numeric { precision, scale }) => {
11079            Some(numeric_from_float(x, precision, scale, col_name)?)
11080        }
11081        // Text → DATE / TIMESTAMP: parse canonical text forms.
11082        (Value::Text(s), DataType::Date) => {
11083            let d = eval::parse_date_literal(&s).ok_or_else(|| {
11084                EngineError::Eval(EvalError::TypeMismatch {
11085                    detail: alloc::format!("cannot parse {s:?} as DATE for column `{col_name}`"),
11086                })
11087            })?;
11088            Some(Value::Date(d))
11089        }
11090        // v4.9: Text ↔ JSON coercion. No structural validation —
11091        // any text literal is accepted; the responsibility for
11092        // valid JSON lies with the producer.
11093        (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
11094        (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
11095        // v7.13.3 — mailrs round-7 S10. SPG's storage represents
11096        // both JSON and JSONB on-disk as `Value::Json(String)` —
11097        // they share the underlying text payload. The cast
11098        // `'<text>'::jsonb` produces a Value::Json that needs to
11099        // satisfy a DataType::Jsonb column. Identity coerce in
11100        // both directions so JSON ↔ JSONB assignments work at all
11101        // INSERT / ALTER COLUMN TYPE / DEFAULT contexts.
11102        (Value::Json(s), DataType::Jsonb | DataType::Json) => Some(Value::Json(s)),
11103        // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
11104        //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
11105        //   - Escape: `Hello\\000world`  (backslash + octal triples)
11106        //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
11107        // Errors surface as TypeMismatch so the operator gets a
11108        // clear "this literal isn't a bytea literal" hint.
11109        (Value::Text(s), DataType::Bytes) => {
11110            let bytes = decode_bytea_literal(&s).map_err(|e| {
11111                EngineError::Eval(EvalError::TypeMismatch {
11112                    detail: alloc::format!(
11113                        "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
11114                    ),
11115                })
11116            })?;
11117            Some(Value::Bytes(bytes))
11118        }
11119        // v7.10.4 — BYTEA → Text round-trip uses the PG hex
11120        // output (lowercase, `\x` prefix). Important when a
11121        // SELECT pulls a bytea cell through a Text column path.
11122        (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
11123        // v7.10.11 — Text → TEXT[]. Decode PG's external array
11124        // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
11125        // is the literal `NULL`; everything else is a quoted or
11126        // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
11127        (Value::Text(s), DataType::TextArray) => {
11128            let arr = decode_text_array_literal(&s).map_err(|e| {
11129                EngineError::Eval(EvalError::TypeMismatch {
11130                    detail: alloc::format!(
11131                        "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
11132                    ),
11133                })
11134            })?;
11135            Some(Value::TextArray(arr))
11136        }
11137        // v7.10.11 — TEXT[] → Text round-trip uses PG's
11138        // external array form (`{a,b,NULL}`). Lets a SELECT
11139        // pull an array column through any Text-side codepath.
11140        (Value::TextArray(items), DataType::Text) => Some(Value::Text(encode_text_array(&items))),
11141        (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
11142            let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
11143                EngineError::Eval(EvalError::TypeMismatch {
11144                    detail: alloc::format!(
11145                        "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
11146                    ),
11147                })
11148            })?;
11149            Some(Value::Timestamp(t))
11150        }
11151        // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
11152        // TIMESTAMP → day truncation).
11153        (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
11154            Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
11155        }
11156        // v7.9.21 — Value::Timestamp lands in either Timestamp
11157        // or Timestamptz columns; the on-disk layout is the
11158        // same i64 microseconds UTC.
11159        (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
11160        (Value::Timestamp(t), DataType::Date) => {
11161            let days = t.div_euclid(86_400_000_000);
11162            i32::try_from(days).ok().map(Value::Date)
11163        }
11164        (
11165            Value::Numeric {
11166                scaled,
11167                scale: src_scale,
11168            },
11169            DataType::Numeric { precision, scale },
11170        ) => Some(numeric_rescale(
11171            scaled, src_scale, precision, scale, col_name,
11172        )?),
11173        #[allow(clippy::cast_precision_loss)]
11174        (Value::Numeric { scaled, scale }, DataType::Float) => {
11175            let mut div = 1.0_f64;
11176            for _ in 0..scale {
11177                div *= 10.0;
11178            }
11179            Some(Value::Float((scaled as f64) / div))
11180        }
11181        (Value::Numeric { scaled, scale }, DataType::Int) => {
11182            let truncated = numeric_truncate_to_integer(scaled, scale);
11183            i32::try_from(truncated).ok().map(Value::Int)
11184        }
11185        (Value::Numeric { scaled, scale }, DataType::BigInt) => {
11186            let truncated = numeric_truncate_to_integer(scaled, scale);
11187            i64::try_from(truncated).ok().map(Value::BigInt)
11188        }
11189        (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
11190            let truncated = numeric_truncate_to_integer(scaled, scale);
11191            i16::try_from(truncated).ok().map(Value::SmallInt)
11192        }
11193        // VARCHAR(n) enforces an upper bound on character count.
11194        (Value::Text(s), DataType::Varchar(max)) => {
11195            if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
11196                Some(Value::Text(s))
11197            } else {
11198                return Err(EngineError::Unsupported(alloc::format!(
11199                    "value for VARCHAR({max}) column `{col_name}` exceeds length: \
11200                     {} chars",
11201                    s.chars().count()
11202                )));
11203            }
11204        }
11205        // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
11206        // when the column declares `VECTOR(N) USING SQ8` and
11207        // the INSERT VALUES expression yields a raw f32 vector
11208        // (the normal pgvector-shape literal). Dim mismatch
11209        // falls through the `_ => None` arm and surfaces as
11210        // `TypeMismatch` with the expected SQ8 column type —
11211        // matching the F32 path's existing error.
11212        (
11213            Value::Vector(v),
11214            DataType::Vector {
11215                dim,
11216                encoding: VecEncoding::Sq8,
11217            },
11218        ) if v.len() == dim as usize => Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v))),
11219        // v6.0.3: f32 → f16 INSERT-time conversion for HALF
11220        // columns. Bit-exact at the storage layer (modulo
11221        // half-precision rounding); no rerank pass needed at
11222        // search time.
11223        (
11224            Value::Vector(v),
11225            DataType::Vector {
11226                dim,
11227                encoding: VecEncoding::F16,
11228            },
11229        ) if v.len() == dim as usize => Some(Value::HalfVector(
11230            spg_storage::halfvec::HalfVector::from_f32_slice(&v),
11231        )),
11232        // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
11233        // is already longer we reject (PG truncates trailing-space-only;
11234        // staying strict for v1).
11235        (Value::Text(s), DataType::Char(size)) => {
11236            let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
11237            if len > size {
11238                return Err(EngineError::Unsupported(alloc::format!(
11239                    "value for CHAR({size}) column `{col_name}` exceeds length: \
11240                     {len} chars"
11241                )));
11242            }
11243            let need = (size - len) as usize;
11244            let mut padded = s;
11245            padded.reserve(need);
11246            for _ in 0..need {
11247                padded.push(' ');
11248            }
11249            Some(Value::Text(padded))
11250        }
11251        _ => None,
11252    };
11253    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
11254        column: col_name.into(),
11255        expected,
11256        actual,
11257        position,
11258    }))
11259}
11260
11261/// v7.12.4 — render a function arg list into the
11262/// canonical form the storage layer caches as
11263/// [`spg_storage::FunctionDef::args_repr`]. The catalogue uses
11264/// this string for both display + as a coarse signature key
11265/// for the (deferred) overload resolution v7.12.5+ adds.
11266fn render_function_args(args: &[spg_sql::ast::FunctionArg]) -> alloc::string::String {
11267    use core::fmt::Write;
11268    let mut out = alloc::string::String::from("(");
11269    for (i, a) in args.iter().enumerate() {
11270        if i > 0 {
11271            out.push_str(", ");
11272        }
11273        match a.mode {
11274            spg_sql::ast::FunctionArgMode::In => {}
11275            spg_sql::ast::FunctionArgMode::Out => out.push_str("OUT "),
11276            spg_sql::ast::FunctionArgMode::InOut => out.push_str("INOUT "),
11277        }
11278        if let Some(n) = &a.name {
11279            out.push_str(n);
11280            out.push(' ');
11281        }
11282        match &a.ty {
11283            spg_sql::ast::FunctionArgType::Typed(t) => {
11284                let _ = write!(out, "{t}");
11285            }
11286            spg_sql::ast::FunctionArgType::Raw(s) => out.push_str(s),
11287        }
11288    }
11289    out.push(')');
11290    out
11291}
11292
11293#[cfg(test)]
11294mod tests {
11295    use super::*;
11296    use alloc::vec;
11297
11298    fn unwrap_command_ok(r: &QueryResult) -> usize {
11299        match r {
11300            QueryResult::CommandOk { affected, .. } => *affected,
11301            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
11302        }
11303    }
11304
11305    #[test]
11306    fn create_table_registers_schema() {
11307        let mut e = Engine::new();
11308        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
11309            .unwrap();
11310        assert_eq!(e.catalog().table_count(), 1);
11311        let t = e.catalog().get("foo").unwrap();
11312        assert_eq!(t.schema().columns.len(), 2);
11313        assert_eq!(t.schema().columns[0].ty, DataType::Int);
11314        assert!(!t.schema().columns[0].nullable);
11315        assert_eq!(t.schema().columns[1].ty, DataType::Text);
11316    }
11317
11318    #[test]
11319    fn create_table_vector_default_is_f32_encoded() {
11320        let mut e = Engine::new();
11321        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
11322        let t = e.catalog().get("t").unwrap();
11323        assert_eq!(
11324            t.schema().columns[0].ty,
11325            DataType::Vector {
11326                dim: 8,
11327                encoding: VecEncoding::F32,
11328            },
11329        );
11330    }
11331
11332    #[test]
11333    fn create_table_vector_using_sq8_succeeds() {
11334        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
11335        // is lifted. CREATE TABLE persists an SQ8 column type in
11336        // the catalog; INSERT (next test) quantises raw f32 input.
11337        let mut e = Engine::new();
11338        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
11339        let t = e.catalog().get("t").unwrap();
11340        assert_eq!(
11341            t.schema().columns[0].ty,
11342            DataType::Vector {
11343                dim: 8,
11344                encoding: VecEncoding::Sq8,
11345            },
11346        );
11347    }
11348
11349    #[test]
11350    fn insert_into_sq8_column_quantises_f32_payload() {
11351        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
11352        // `Value::Vector(Vec<f32>)` literal into the column's
11353        // quantised representation. The row that lands in the
11354        // catalog must therefore hold a `Value::Sq8Vector`, not the
11355        // original f32 buffer — that's the bit that delivers the
11356        // 4× compression target.
11357        let mut e = Engine::new();
11358        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
11359        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
11360            .unwrap();
11361        let t = e.catalog().get("t").unwrap();
11362        assert_eq!(t.rows().len(), 1);
11363        match &t.rows()[0].values[0] {
11364            Value::Sq8Vector(q) => {
11365                assert_eq!(q.bytes.len(), 4);
11366                // min/max are derived from the payload: min=0.0, max=1.0.
11367                assert!((q.min - 0.0).abs() < 1e-6);
11368                assert!((q.max - 1.0).abs() < 1e-6);
11369            }
11370            other => panic!("expected Sq8Vector cell, got {other:?}"),
11371        }
11372    }
11373
11374    #[test]
11375    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
11376        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
11377        // converts the incoming `Value::Vector(Vec<f32>)` cell
11378        // into `Value::HalfVector(HalfVector)` via the new
11379        // `coerce_value` arm. The dequantised round-trip is
11380        // bit-exact for f16-representable values, so 0.0 / 0.25
11381        // / 0.5 / 1.0 hit their grid points exactly.
11382        let mut e = Engine::new();
11383        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
11384            .unwrap();
11385        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
11386            .unwrap();
11387        let t = e.catalog().get("t").unwrap();
11388        assert_eq!(t.rows().len(), 1);
11389        match &t.rows()[0].values[0] {
11390            Value::HalfVector(h) => {
11391                assert_eq!(h.dim(), 4);
11392                let back = h.to_f32_vec();
11393                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
11394                for (g, e) in back.iter().zip(expected.iter()) {
11395                    assert!(
11396                        (g - e).abs() < 1e-6,
11397                        "{g} vs {e} should be exact on f16 grid"
11398                    );
11399                }
11400            }
11401            other => panic!("expected HalfVector cell, got {other:?}"),
11402        }
11403    }
11404
11405    #[test]
11406    fn alter_index_rebuild_in_place_succeeds() {
11407        // v6.0.4: bare REBUILD (no encoding switch) walks every
11408        // row again to rebuild the NSW graph. Verifies the engine
11409        // dispatch + storage helper plumbing without changing any
11410        // cell encoding.
11411        let mut e = Engine::new();
11412        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
11413            .unwrap();
11414        for i in 0..8_i32 {
11415            #[allow(clippy::cast_precision_loss)]
11416            let base = (i as f32) * 0.1;
11417            e.execute(&alloc::format!(
11418                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
11419                b1 = base + 0.01,
11420                b2 = base + 0.02,
11421            ))
11422            .unwrap();
11423        }
11424        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
11425        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
11426        // Schema encoding stays F32 (no encoding clause).
11427        assert_eq!(
11428            e.catalog().get("t").unwrap().schema().columns[1].ty,
11429            DataType::Vector {
11430                dim: 3,
11431                encoding: VecEncoding::F32,
11432            },
11433        );
11434    }
11435
11436    #[test]
11437    fn alter_index_rebuild_with_encoding_switches_cell_type() {
11438        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
11439        // stored cell from F32 → SQ8 + rebuilds the graph atop the
11440        // new encoding. Post-rebuild, cells must be Sq8Vector and
11441        // the schema must report encoding = Sq8.
11442        let mut e = Engine::new();
11443        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
11444            .unwrap();
11445        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
11446            .unwrap();
11447        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
11448        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
11449            .unwrap();
11450        let t = e.catalog().get("t").unwrap();
11451        assert_eq!(
11452            t.schema().columns[1].ty,
11453            DataType::Vector {
11454                dim: 4,
11455                encoding: VecEncoding::Sq8,
11456            },
11457        );
11458        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
11459    }
11460
11461    #[test]
11462    fn alter_index_rebuild_unknown_index_errors() {
11463        let mut e = Engine::new();
11464        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
11465        assert!(
11466            matches!(
11467                &err,
11468                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
11469            ),
11470            "got: {err}"
11471        );
11472    }
11473
11474    #[test]
11475    fn alter_index_rebuild_on_btree_index_errors() {
11476        // REBUILD on a B-tree index has no semantic meaning in
11477        // v6.0.4 — rejected at the storage layer with `Unsupported`.
11478        let mut e = Engine::new();
11479        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
11480        e.execute("INSERT INTO t VALUES (1)").unwrap();
11481        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
11482        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
11483        assert!(
11484            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
11485            "got: {err}"
11486        );
11487    }
11488
11489    #[test]
11490    fn prepared_insert_substitutes_placeholders() {
11491        // v6.1.1: prepare() parses once; execute_prepared() walks the
11492        // AST and replaces $1/$2 with the param Values BEFORE the
11493        // dispatch sees them. Same logical result as a simple-query
11494        // INSERT, but parse happens once per *statement*, not per
11495        // execution.
11496        let mut e = Engine::new();
11497        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
11498            .unwrap();
11499        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
11500        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
11501            e.execute_prepared(stmt.clone(), &[Value::Int(id), Value::Text(name.into())])
11502                .unwrap();
11503        }
11504        // Read back via simple-query SELECT.
11505        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
11506        let QueryResult::Rows { rows, .. } = rows_result else {
11507            panic!("expected Rows")
11508        };
11509        assert_eq!(rows.len(), 3);
11510    }
11511
11512    #[test]
11513    fn prepared_select_with_placeholder_filters_rows() {
11514        let mut e = Engine::new();
11515        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
11516            .unwrap();
11517        for i in 0..10_i32 {
11518            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
11519                .unwrap();
11520        }
11521        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
11522        let QueryResult::Rows { rows, .. } = e.execute_prepared(stmt, &[Value::Int(35)]).unwrap()
11523        else {
11524            panic!("expected Rows")
11525        };
11526        // v = 35 means i*7 = 35 → i = 5.
11527        assert_eq!(rows.len(), 1);
11528        assert_eq!(rows[0].values[0], Value::Int(5));
11529    }
11530
11531    #[test]
11532    fn prepared_too_few_params_errors() {
11533        let mut e = Engine::new();
11534        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
11535        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
11536        let err = e.execute_prepared(stmt, &[]).unwrap_err();
11537        assert!(
11538            matches!(
11539                &err,
11540                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
11541            ),
11542            "got: {err}"
11543        );
11544    }
11545
11546    #[test]
11547    fn insert_into_half_column_dim_mismatch_errors() {
11548        let mut e = Engine::new();
11549        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
11550            .unwrap();
11551        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
11552        assert!(matches!(
11553            &err,
11554            EngineError::Storage(StorageError::TypeMismatch { .. })
11555        ));
11556    }
11557
11558    #[test]
11559    fn insert_into_sq8_column_dim_mismatch_errors() {
11560        // Dim mismatch falls through the `coerce_value` Vector→Sq8
11561        // arm's guard and surfaces as `TypeMismatch` — the same
11562        // error the F32 path produces today, so client error
11563        // handling stays uniform across encodings.
11564        let mut e = Engine::new();
11565        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
11566        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
11567        assert!(
11568            matches!(
11569                &err,
11570                EngineError::Storage(StorageError::TypeMismatch { .. })
11571            ),
11572            "got: {err}",
11573        );
11574    }
11575
11576    #[test]
11577    fn create_table_duplicate_errors() {
11578        let mut e = Engine::new();
11579        e.execute("CREATE TABLE foo (a INT)").unwrap();
11580        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
11581        assert!(matches!(
11582            err,
11583            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
11584        ));
11585    }
11586
11587    #[test]
11588    fn insert_into_unknown_table_errors() {
11589        let mut e = Engine::new();
11590        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
11591        assert!(matches!(
11592            err,
11593            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
11594        ));
11595    }
11596
11597    #[test]
11598    fn insert_happy_path_reports_one_affected() {
11599        let mut e = Engine::new();
11600        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
11601        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
11602        assert_eq!(unwrap_command_ok(&r), 1);
11603        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
11604    }
11605
11606    #[test]
11607    fn insert_arity_mismatch_propagates() {
11608        let mut e = Engine::new();
11609        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
11610        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
11611        assert!(matches!(
11612            err,
11613            EngineError::Storage(StorageError::ArityMismatch { .. })
11614        ));
11615    }
11616
11617    #[test]
11618    fn insert_negative_integer_via_unary_minus() {
11619        let mut e = Engine::new();
11620        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
11621        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
11622        let rows = e.catalog().get("foo").unwrap().rows();
11623        assert_eq!(rows[0].values[0], Value::Int(-7));
11624    }
11625
11626    #[test]
11627    fn insert_non_literal_expr_unsupported() {
11628        let mut e = Engine::new();
11629        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
11630        let err = e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap_err();
11631        assert!(matches!(err, EngineError::Unsupported(_)));
11632    }
11633
11634    #[test]
11635    fn select_star_returns_all_rows_in_insertion_order() {
11636        let mut e = Engine::new();
11637        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
11638            .unwrap();
11639        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
11640        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
11641        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
11642
11643        let r = e.execute("SELECT * FROM foo").unwrap();
11644        let QueryResult::Rows { columns, rows } = r else {
11645            panic!("expected Rows")
11646        };
11647        assert_eq!(columns.len(), 2);
11648        assert_eq!(columns[0].name, "a");
11649        assert_eq!(rows.len(), 3);
11650        assert_eq!(
11651            rows[1].values,
11652            vec![Value::Int(2), Value::Text("two".into())]
11653        );
11654    }
11655
11656    #[test]
11657    fn select_star_on_empty_table_returns_zero_rows() {
11658        let mut e = Engine::new();
11659        e.execute("CREATE TABLE foo (a INT)").unwrap();
11660        let r = e.execute("SELECT * FROM foo").unwrap();
11661        match r {
11662            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
11663            QueryResult::CommandOk { .. } => panic!("expected Rows"),
11664        }
11665    }
11666
11667    // --- v0.4: WHERE + projection ------------------------------------------
11668
11669    fn make_three_row_users(e: &mut Engine) {
11670        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
11671            .unwrap();
11672        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
11673            .unwrap();
11674        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
11675            .unwrap();
11676        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
11677            .unwrap();
11678    }
11679
11680    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
11681        match r {
11682            QueryResult::Rows { columns, rows } => (columns, rows),
11683            QueryResult::CommandOk { .. } => panic!("expected Rows"),
11684        }
11685    }
11686
11687    #[test]
11688    fn where_filter_passes_only_true_rows() {
11689        let mut e = Engine::new();
11690        make_three_row_users(&mut e);
11691        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
11692        let (_, rows) = unwrap_rows(r);
11693        assert_eq!(rows.len(), 2);
11694        assert_eq!(rows[0].values[0], Value::Int(2));
11695        assert_eq!(rows[1].values[0], Value::Int(3));
11696    }
11697
11698    #[test]
11699    fn where_with_null_result_filters_out_row() {
11700        let mut e = Engine::new();
11701        make_three_row_users(&mut e);
11702        // score is NULL for bob → score > 80 is NULL → row excluded
11703        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
11704        let (_, rows) = unwrap_rows(r);
11705        assert_eq!(rows.len(), 1);
11706        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
11707    }
11708
11709    #[test]
11710    fn projection_named_columns() {
11711        let mut e = Engine::new();
11712        make_three_row_users(&mut e);
11713        let r = e.execute("SELECT name, score FROM users").unwrap();
11714        let (cols, rows) = unwrap_rows(r);
11715        assert_eq!(cols.len(), 2);
11716        assert_eq!(cols[0].name, "name");
11717        assert_eq!(cols[1].name, "score");
11718        assert_eq!(rows.len(), 3);
11719        assert_eq!(
11720            rows[0].values,
11721            vec![Value::Text("alice".into()), Value::Int(90)]
11722        );
11723    }
11724
11725    #[test]
11726    fn projection_with_column_alias() {
11727        let mut e = Engine::new();
11728        make_three_row_users(&mut e);
11729        let r = e
11730            .execute("SELECT name AS who FROM users WHERE id = 1")
11731            .unwrap();
11732        let (cols, rows) = unwrap_rows(r);
11733        assert_eq!(cols[0].name, "who");
11734        assert_eq!(rows.len(), 1);
11735        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
11736    }
11737
11738    #[test]
11739    fn qualified_column_with_table_alias_resolves() {
11740        let mut e = Engine::new();
11741        make_three_row_users(&mut e);
11742        let r = e
11743            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
11744            .unwrap();
11745        let (cols, rows) = unwrap_rows(r);
11746        assert_eq!(cols.len(), 2);
11747        assert_eq!(rows.len(), 2);
11748    }
11749
11750    #[test]
11751    fn qualified_column_with_wrong_alias_errors() {
11752        let mut e = Engine::new();
11753        make_three_row_users(&mut e);
11754        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
11755        assert!(matches!(
11756            err,
11757            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
11758        ));
11759    }
11760
11761    #[test]
11762    fn select_unknown_column_errors_in_projection() {
11763        let mut e = Engine::new();
11764        make_three_row_users(&mut e);
11765        let err = e.execute("SELECT ghost FROM users").unwrap_err();
11766        assert!(matches!(
11767            err,
11768            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
11769        ));
11770    }
11771
11772    #[test]
11773    fn where_unknown_column_errors() {
11774        let mut e = Engine::new();
11775        make_three_row_users(&mut e);
11776        let err = e
11777            .execute("SELECT * FROM users WHERE ghost = 1")
11778            .unwrap_err();
11779        assert!(matches!(
11780            err,
11781            EngineError::Eval(EvalError::ColumnNotFound { .. })
11782        ));
11783    }
11784
11785    #[test]
11786    fn expression_projection_evaluates_and_renders() {
11787        // Compound expressions in the SELECT list are evaluated per row;
11788        // the output column is typed TEXT, name defaults to the expression.
11789        let mut e = Engine::new();
11790        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
11791        e.execute("INSERT INTO t VALUES (3)").unwrap();
11792        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
11793        assert_eq!(rows.len(), 1);
11794        // The expression evaluates to integer 3; rendered as the cell value
11795        // (storage::Value::Int(3) since arithmetic kept ints).
11796        assert_eq!(rows[0].values[0], Value::Int(3));
11797    }
11798
11799    #[test]
11800    fn select_unknown_table_errors() {
11801        let mut e = Engine::new();
11802        let err = e.execute("SELECT * FROM ghost").unwrap_err();
11803        assert!(matches!(
11804            err,
11805            EngineError::Storage(StorageError::TableNotFound { .. })
11806        ));
11807    }
11808
11809    #[test]
11810    fn invalid_sql_returns_parse_error() {
11811        // v4.4: UPDATE is now real SQL, so use a true syntactic
11812        // garbage payload for the parse-error path.
11813        let mut e = Engine::new();
11814        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
11815        assert!(matches!(err, EngineError::Parse(_)));
11816    }
11817
11818    // --- v0.8 CREATE INDEX + index seek ------------------------------------
11819
11820    #[test]
11821    fn create_index_registers_on_table() {
11822        let mut e = Engine::new();
11823        make_three_row_users(&mut e);
11824        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
11825        let t = e.catalog().get("users").unwrap();
11826        assert_eq!(t.indices().len(), 1);
11827        assert_eq!(t.indices()[0].name, "by_name");
11828    }
11829
11830    #[test]
11831    fn create_index_on_unknown_table_errors() {
11832        let mut e = Engine::new();
11833        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
11834        assert!(matches!(
11835            err,
11836            EngineError::Storage(StorageError::TableNotFound { .. })
11837        ));
11838    }
11839
11840    #[test]
11841    fn create_index_on_unknown_column_errors() {
11842        let mut e = Engine::new();
11843        make_three_row_users(&mut e);
11844        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
11845        assert!(matches!(
11846            err,
11847            EngineError::Storage(StorageError::ColumnNotFound { .. })
11848        ));
11849    }
11850
11851    #[test]
11852    fn select_eq_uses_index_returns_same_rows_as_scan() {
11853        // Build two engines: one with an index, one without. Same query →
11854        // same row set (index is a planner optimisation, not a semantic
11855        // change).
11856        let mut without = Engine::new();
11857        make_three_row_users(&mut without);
11858        let mut with = Engine::new();
11859        make_three_row_users(&mut with);
11860        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
11861
11862        let q = "SELECT * FROM users WHERE id = 2";
11863        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
11864        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
11865        assert_eq!(no_idx_rows, idx_rows);
11866        assert_eq!(idx_rows.len(), 1);
11867    }
11868
11869    #[test]
11870    fn select_eq_with_no_matching_index_value_returns_empty() {
11871        let mut e = Engine::new();
11872        make_three_row_users(&mut e);
11873        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
11874        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
11875        assert_eq!(rows.len(), 0);
11876    }
11877
11878    // --- v0.9 transactions -------------------------------------------------
11879
11880    #[test]
11881    fn begin_sets_in_transaction_flag() {
11882        let mut e = Engine::new();
11883        assert!(!e.in_transaction());
11884        e.execute("BEGIN").unwrap();
11885        assert!(e.in_transaction());
11886    }
11887
11888    #[test]
11889    fn double_begin_errors() {
11890        let mut e = Engine::new();
11891        e.execute("BEGIN").unwrap();
11892        let err = e.execute("BEGIN").unwrap_err();
11893        assert_eq!(err, EngineError::TransactionAlreadyOpen);
11894    }
11895
11896    #[test]
11897    fn commit_without_begin_errors() {
11898        let mut e = Engine::new();
11899        let err = e.execute("COMMIT").unwrap_err();
11900        assert_eq!(err, EngineError::NoActiveTransaction);
11901    }
11902
11903    #[test]
11904    fn rollback_without_begin_errors() {
11905        let mut e = Engine::new();
11906        let err = e.execute("ROLLBACK").unwrap_err();
11907        assert_eq!(err, EngineError::NoActiveTransaction);
11908    }
11909
11910    #[test]
11911    fn commit_applies_shadow_to_committed_catalog() {
11912        let mut e = Engine::new();
11913        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
11914        e.execute("BEGIN").unwrap();
11915        e.execute("INSERT INTO t VALUES (1)").unwrap();
11916        e.execute("INSERT INTO t VALUES (2)").unwrap();
11917        e.execute("COMMIT").unwrap();
11918        assert!(!e.in_transaction());
11919        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
11920    }
11921
11922    #[test]
11923    fn rollback_discards_shadow() {
11924        let mut e = Engine::new();
11925        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
11926        e.execute("BEGIN").unwrap();
11927        e.execute("INSERT INTO t VALUES (1)").unwrap();
11928        e.execute("INSERT INTO t VALUES (2)").unwrap();
11929        e.execute("ROLLBACK").unwrap();
11930        assert!(!e.in_transaction());
11931        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
11932    }
11933
11934    #[test]
11935    fn select_during_tx_sees_uncommitted_writes_own_session() {
11936        // The shadow catalog is read by SELECTs while a TX is open — the
11937        // session can see its own pending writes.
11938        let mut e = Engine::new();
11939        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
11940        e.execute("BEGIN").unwrap();
11941        e.execute("INSERT INTO t VALUES (42)").unwrap();
11942        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
11943        assert_eq!(rows.len(), 1);
11944        assert_eq!(rows[0].values[0], Value::Int(42));
11945    }
11946
11947    #[test]
11948    fn snapshot_with_no_users_is_bare_catalog_format() {
11949        let mut e = Engine::new();
11950        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
11951        let bytes = e.snapshot();
11952        assert_eq!(
11953            &bytes[..8],
11954            b"SPGDB001",
11955            "must be the bare v3.x catalog magic"
11956        );
11957        let e2 = Engine::restore_envelope(&bytes).unwrap();
11958        assert!(e2.users().is_empty());
11959        assert_eq!(e2.catalog().table_count(), 1);
11960    }
11961
11962    #[test]
11963    fn snapshot_with_users_round_trips_both_via_envelope() {
11964        let mut e = Engine::new();
11965        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
11966        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
11967        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
11968            .unwrap();
11969        let bytes = e.snapshot();
11970        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
11971        let e2 = Engine::restore_envelope(&bytes).unwrap();
11972        assert_eq!(e2.users().len(), 2);
11973        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
11974        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
11975        assert_eq!(e2.verify_user("alice", "wrong"), None);
11976        assert_eq!(e2.catalog().table_count(), 1);
11977    }
11978
11979    #[test]
11980    fn ddl_inside_tx_also_rolled_back() {
11981        let mut e = Engine::new();
11982        e.execute("BEGIN").unwrap();
11983        e.execute("CREATE TABLE t (v INT)").unwrap();
11984        // Visible inside the TX.
11985        e.execute("SELECT * FROM t").unwrap();
11986        e.execute("ROLLBACK").unwrap();
11987        // Gone after rollback.
11988        let err = e.execute("SELECT * FROM t").unwrap_err();
11989        assert!(matches!(
11990            err,
11991            EngineError::Storage(StorageError::TableNotFound { .. })
11992        ));
11993    }
11994
11995    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
11996
11997    #[test]
11998    fn create_publication_lands_in_catalog() {
11999        let mut e = Engine::new();
12000        assert!(e.publications().is_empty());
12001        e.execute("CREATE PUBLICATION pub_a").unwrap();
12002        assert_eq!(e.publications().len(), 1);
12003        assert!(e.publications().contains("pub_a"));
12004    }
12005
12006    #[test]
12007    fn create_publication_duplicate_errors() {
12008        let mut e = Engine::new();
12009        e.execute("CREATE PUBLICATION pub_a").unwrap();
12010        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
12011        assert!(
12012            alloc::format!("{err:?}").contains("DuplicateName"),
12013            "got {err:?}"
12014        );
12015    }
12016
12017    #[test]
12018    fn drop_publication_silent_when_absent() {
12019        let mut e = Engine::new();
12020        // PG-compatible: DROP a publication that doesn't exist
12021        // succeeds (no-op) but reports zero affected.
12022        let r = e.execute("DROP PUBLICATION nope").unwrap();
12023        match r {
12024            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
12025            other => panic!("expected CommandOk, got {other:?}"),
12026        }
12027    }
12028
12029    #[test]
12030    fn drop_publication_present_reports_one_affected() {
12031        let mut e = Engine::new();
12032        e.execute("CREATE PUBLICATION pub_a").unwrap();
12033        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
12034        match r {
12035            QueryResult::CommandOk {
12036                affected,
12037                modified_catalog,
12038            } => {
12039                assert_eq!(affected, 1);
12040                assert!(modified_catalog);
12041            }
12042            other => panic!("expected CommandOk, got {other:?}"),
12043        }
12044        assert!(e.publications().is_empty());
12045    }
12046
12047    #[test]
12048    fn publications_persist_across_snapshot_restore() {
12049        // The persist-across-restart ship-gate at the engine layer —
12050        // snapshot → restore_envelope round trip must preserve the
12051        // publication catalog. The spg-server e2e covers the
12052        // process-restart variant.
12053        let mut e = Engine::new();
12054        e.execute("CREATE PUBLICATION pub_a").unwrap();
12055        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES")
12056            .unwrap();
12057        let snap = e.snapshot();
12058        let e2 = Engine::restore_envelope(&snap).unwrap();
12059        assert_eq!(e2.publications().len(), 2);
12060        assert!(e2.publications().contains("pub_a"));
12061        assert!(e2.publications().contains("pub_b"));
12062    }
12063
12064    #[test]
12065    fn create_publication_allowed_inside_transaction() {
12066        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
12067        // CREATE PUBLICATION inside a TX and the auto-commit
12068        // wrap path needs the same allowance.
12069        let mut e = Engine::new();
12070        e.execute("BEGIN").unwrap();
12071        e.execute("CREATE PUBLICATION pub_a").unwrap();
12072        e.execute("COMMIT").unwrap();
12073        assert!(e.publications().contains("pub_a"));
12074    }
12075
12076    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
12077
12078    #[test]
12079    fn create_publication_for_table_list_lands_with_scope() {
12080        let mut e = Engine::new();
12081        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
12082        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
12083        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
12084            .unwrap();
12085        let scope = e.publications().get("pub_a").cloned();
12086        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
12087            panic!("expected ForTables scope, got {scope:?}")
12088        };
12089        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12090    }
12091
12092    #[test]
12093    fn create_publication_all_tables_except_lands_with_scope() {
12094        let mut e = Engine::new();
12095        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
12096            .unwrap();
12097        let scope = e.publications().get("pub_a").cloned();
12098        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
12099            panic!("expected AllTablesExcept scope, got {scope:?}")
12100        };
12101        assert_eq!(ts, alloc::vec!["t3".to_string()]);
12102    }
12103
12104    #[test]
12105    fn show_publications_empty_returns_zero_rows() {
12106        let e = Engine::new();
12107        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12108        let QueryResult::Rows { rows, columns } = r else {
12109            panic!()
12110        };
12111        assert!(rows.is_empty());
12112        assert_eq!(columns.len(), 3);
12113        assert_eq!(columns[0].name, "name");
12114        assert_eq!(columns[1].name, "scope");
12115        assert_eq!(columns[2].name, "table_count");
12116    }
12117
12118    #[test]
12119    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
12120        let mut e = Engine::new();
12121        e.execute("CREATE PUBLICATION z_pub").unwrap();
12122        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
12123            .unwrap();
12124        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
12125            .unwrap();
12126        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12127        let QueryResult::Rows { rows, .. } = r else {
12128            panic!()
12129        };
12130        assert_eq!(rows.len(), 3);
12131        // Alphabetical order: a_pub, m_pub, z_pub.
12132        let names: Vec<&str> = rows
12133            .iter()
12134            .map(|r| {
12135                if let Value::Text(s) = &r.values[0] {
12136                    s.as_str()
12137                } else {
12138                    panic!()
12139                }
12140            })
12141            .collect();
12142        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
12143        // Row 0 — a_pub scope summary + table_count = 2.
12144        match &rows[0].values[1] {
12145            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
12146            other => panic!("expected Text, got {other:?}"),
12147        }
12148        assert_eq!(rows[0].values[2], Value::Int(2));
12149        // Row 1 — m_pub.
12150        match &rows[1].values[1] {
12151            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
12152            other => panic!("expected Text, got {other:?}"),
12153        }
12154        assert_eq!(rows[1].values[2], Value::Int(1));
12155        // Row 2 — z_pub (AllTables → NULL count).
12156        match &rows[2].values[1] {
12157            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
12158            other => panic!("expected Text, got {other:?}"),
12159        }
12160        assert_eq!(rows[2].values[2], Value::Null);
12161    }
12162
12163    #[test]
12164    fn for_list_scopes_persist_across_snapshot() {
12165        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
12166        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
12167        let mut e = Engine::new();
12168        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
12169        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
12170            .unwrap();
12171        let snap = e.snapshot();
12172        let e2 = Engine::restore_envelope(&snap).unwrap();
12173        assert_eq!(e2.publications().len(), 2);
12174        let p1 = e2.publications().get("p1").cloned();
12175        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
12176            panic!("p1 scope lost: {p1:?}")
12177        };
12178        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12179        let p2 = e2.publications().get("p2").cloned();
12180        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
12181            panic!("p2 scope lost: {p2:?}")
12182        };
12183        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
12184    }
12185
12186    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
12187
12188    #[test]
12189    fn create_subscription_lands_in_catalog_with_defaults() {
12190        let mut e = Engine::new();
12191        e.execute(
12192            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
12193        )
12194        .unwrap();
12195        let s = e.subscriptions().get("sub_a").cloned().expect("present");
12196        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
12197        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
12198        assert!(s.enabled);
12199        assert_eq!(s.last_received_pos, 0);
12200    }
12201
12202    #[test]
12203    fn create_subscription_duplicate_name_errors() {
12204        let mut e = Engine::new();
12205        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
12206            .unwrap();
12207        let err = e
12208            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
12209            .unwrap_err();
12210        assert!(
12211            alloc::format!("{err:?}").contains("DuplicateName"),
12212            "got {err:?}"
12213        );
12214    }
12215
12216    #[test]
12217    fn drop_subscription_silent_when_absent() {
12218        let mut e = Engine::new();
12219        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
12220        match r {
12221            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
12222            other => panic!("expected CommandOk, got {other:?}"),
12223        }
12224    }
12225
12226    #[test]
12227    fn subscription_advance_updates_last_pos_monotone() {
12228        let mut e = Engine::new();
12229        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
12230            .unwrap();
12231        assert!(e.subscription_advance("s", 100));
12232        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
12233        assert!(e.subscription_advance("s", 50)); // stale → ignored
12234        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
12235        assert!(e.subscription_advance("s", 200));
12236        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
12237        assert!(!e.subscription_advance("missing", 1));
12238    }
12239
12240    #[test]
12241    fn show_subscriptions_returns_rows_ordered_by_name() {
12242        let mut e = Engine::new();
12243        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
12244            .unwrap();
12245        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
12246            .unwrap();
12247        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
12248        let QueryResult::Rows { rows, columns } = r else {
12249            panic!()
12250        };
12251        assert_eq!(rows.len(), 2);
12252        assert_eq!(columns.len(), 5);
12253        assert_eq!(columns[0].name, "name");
12254        assert_eq!(columns[4].name, "last_received_pos");
12255        // Alphabetical: a_sub, z_sub.
12256        let names: Vec<&str> = rows
12257            .iter()
12258            .map(|r| {
12259                if let Value::Text(s) = &r.values[0] {
12260                    s.as_str()
12261                } else {
12262                    panic!()
12263                }
12264            })
12265            .collect();
12266        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
12267        // Row 0: a_sub
12268        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
12269        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
12270        assert_eq!(rows[0].values[3], Value::Bool(true));
12271        assert_eq!(rows[0].values[4], Value::BigInt(0));
12272        // Row 1: z_sub — publications join with ", "
12273        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
12274    }
12275
12276    #[test]
12277    fn subscriptions_persist_across_snapshot_envelope_v4() {
12278        let mut e = Engine::new();
12279        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
12280            .unwrap();
12281        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
12282            .unwrap();
12283        e.subscription_advance("s2", 42);
12284        let snap = e.snapshot();
12285        let e2 = Engine::restore_envelope(&snap).unwrap();
12286        assert_eq!(e2.subscriptions().len(), 2);
12287        let s1 = e2.subscriptions().get("s1").unwrap();
12288        assert_eq!(s1.conn_str, "h=A");
12289        assert_eq!(
12290            s1.publications,
12291            alloc::vec!["p1".to_string(), "p2".to_string()]
12292        );
12293        assert_eq!(s1.last_received_pos, 0);
12294        let s2 = e2.subscriptions().get("s2").unwrap();
12295        assert_eq!(s2.last_received_pos, 42);
12296    }
12297
12298    #[test]
12299    fn v3_envelope_loads_with_empty_subscriptions() {
12300        // v3 snapshot (publications-only). Forge it by hand so we
12301        // verify v6.1.4 readers don't panic — they must surface
12302        // empty subscriptions and a populated publication table.
12303        let mut e = Engine::new();
12304        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
12305        let catalog = e.catalog.serialize();
12306        let users = crate::users::serialize_users(&e.users);
12307        let pubs = e.publications.serialize();
12308        let mut buf = Vec::new();
12309        buf.extend_from_slice(b"SPGENV01");
12310        buf.push(3u8); // v3
12311        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
12312        buf.extend_from_slice(&catalog);
12313        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
12314        buf.extend_from_slice(&users);
12315        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
12316        buf.extend_from_slice(&pubs);
12317        let crc = spg_crypto::crc32::crc32(&buf);
12318        buf.extend_from_slice(&crc.to_le_bytes());
12319
12320        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
12321        assert!(e2.subscriptions().is_empty());
12322        assert!(e2.publications().contains("pub_legacy"));
12323    }
12324
12325    #[test]
12326    fn create_subscription_allowed_inside_transaction() {
12327        let mut e = Engine::new();
12328        e.execute("BEGIN").unwrap();
12329        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
12330            .unwrap();
12331        e.execute("COMMIT").unwrap();
12332        assert!(e.subscriptions().contains("s"));
12333    }
12334
12335    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
12336    #[test]
12337    fn analyze_populates_histogram_bounds() {
12338        let mut e = Engine::new();
12339        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)")
12340            .unwrap();
12341        for i in 0..50 {
12342            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'name{i}')"))
12343                .unwrap();
12344        }
12345        e.execute("ANALYZE t").unwrap();
12346        let stats = e.statistics();
12347        let id_stats = stats.get("t", "id").unwrap();
12348        assert!(id_stats.histogram_bounds.len() >= 2);
12349        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
12350        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
12351        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
12352        assert_eq!(id_stats.n_distinct, 50);
12353    }
12354
12355    #[test]
12356    fn reanalyze_overwrites_prior_stats() {
12357        let mut e = Engine::new();
12358        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12359        for i in 0..10 {
12360            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12361                .unwrap();
12362        }
12363        e.execute("ANALYZE t").unwrap();
12364        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
12365        assert_eq!(n1, 10);
12366        for i in 10..30 {
12367            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12368                .unwrap();
12369        }
12370        e.execute("ANALYZE t").unwrap();
12371        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
12372        assert_eq!(n2, 30);
12373    }
12374
12375    #[test]
12376    fn analyze_unknown_table_errors() {
12377        let mut e = Engine::new();
12378        let err = e.execute("ANALYZE nonexistent").unwrap_err();
12379        assert!(matches!(
12380            err,
12381            EngineError::Storage(StorageError::TableNotFound { .. })
12382        ));
12383    }
12384
12385    #[test]
12386    fn bare_analyze_covers_all_user_tables() {
12387        let mut e = Engine::new();
12388        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
12389        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
12390        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
12391        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
12392        let r = e.execute("ANALYZE").unwrap();
12393        match r {
12394            QueryResult::CommandOk {
12395                affected,
12396                modified_catalog,
12397            } => {
12398                assert_eq!(affected, 2);
12399                assert!(modified_catalog);
12400            }
12401            other => panic!("expected CommandOk, got {other:?}"),
12402        }
12403        assert!(e.statistics().get("t1", "id").is_some());
12404        assert!(e.statistics().get("t2", "name").is_some());
12405    }
12406
12407    #[test]
12408    fn select_from_spg_statistic_returns_rows_per_column() {
12409        let mut e = Engine::new();
12410        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
12411            .unwrap();
12412        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
12413        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
12414        e.execute("ANALYZE t").unwrap();
12415        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
12416        let QueryResult::Rows { rows, columns } = r else {
12417            panic!()
12418        };
12419        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
12420        assert_eq!(columns.len(), 6);
12421        assert_eq!(columns[0].name, "table_name");
12422        assert_eq!(columns[4].name, "histogram_bounds");
12423        assert_eq!(columns[5].name, "cold_row_count");
12424        assert_eq!(rows.len(), 2, "one row per column of t");
12425        // Sorted by (table_name, column_name).
12426        match (&rows[0].values[0], &rows[0].values[1]) {
12427            (Value::Text(t), Value::Text(c)) => {
12428                assert_eq!(t, "t");
12429                // BTreeMap orders (table, column); columns "id" < "label".
12430                assert_eq!(c, "id");
12431            }
12432            _ => panic!(),
12433        }
12434    }
12435
12436    #[test]
12437    fn analyze_skips_vector_columns() {
12438        // Vector columns have their own stats shape (HNSW graph);
12439        // ANALYZE leaves them out of spg_statistic.
12440        let mut e = Engine::new();
12441        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
12442            .unwrap();
12443        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
12444        e.execute("ANALYZE t").unwrap();
12445        assert!(e.statistics().get("t", "id").is_some());
12446        assert!(e.statistics().get("t", "v").is_none());
12447    }
12448
12449    #[test]
12450    fn statistics_persist_across_envelope_v5_round_trip() {
12451        let mut e = Engine::new();
12452        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12453        for i in 0..20 {
12454            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12455                .unwrap();
12456        }
12457        e.execute("ANALYZE").unwrap();
12458        let snap = e.snapshot();
12459        let e2 = Engine::restore_envelope(&snap).unwrap();
12460        let s = e2.statistics().get("t", "id").unwrap();
12461        assert_eq!(s.n_distinct, 20);
12462    }
12463
12464    // ── v6.2.1 auto-analyze threshold ───────────────────────────
12465
12466    #[test]
12467    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
12468        // For a table with 0 rows then 10 inserts → modified=10,
12469        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
12470        // after the 10th INSERT the threshold is met.
12471        let mut e = Engine::new();
12472        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12473        for i in 0..9 {
12474            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12475                .unwrap();
12476        }
12477        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
12478        e.execute("INSERT INTO t VALUES (9)").unwrap();
12479        let needs = e.tables_needing_analyze();
12480        assert_eq!(needs, alloc::vec!["t".to_string()]);
12481    }
12482
12483    #[test]
12484    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
12485        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
12486        // Each new INSERT bumps both modified and row_count, so to
12487        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
12488        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
12489        // more (200 total mods, row_count=1200, threshold=120 → fire).
12490        let mut e = Engine::new();
12491        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12492        for i in 0..1000 {
12493            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12494                .unwrap();
12495        }
12496        e.execute("ANALYZE t").unwrap();
12497        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
12498        for i in 1000..1050 {
12499            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12500                .unwrap();
12501        }
12502        assert!(
12503            e.tables_needing_analyze().is_empty(),
12504            "50 inserts < threshold of ~105"
12505        );
12506        for i in 1050..1200 {
12507            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12508                .unwrap();
12509        }
12510        assert_eq!(
12511            e.tables_needing_analyze(),
12512            alloc::vec!["t".to_string()],
12513            "200 inserts > 0.1 × 1200 threshold"
12514        );
12515    }
12516
12517    #[test]
12518    fn auto_analyze_threshold_resets_after_analyze() {
12519        let mut e = Engine::new();
12520        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12521        for i in 0..200 {
12522            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12523                .unwrap();
12524        }
12525        assert!(!e.tables_needing_analyze().is_empty());
12526        e.execute("ANALYZE").unwrap();
12527        assert!(
12528            e.tables_needing_analyze().is_empty(),
12529            "ANALYZE must reset the counter"
12530        );
12531    }
12532
12533    #[test]
12534    fn auto_analyze_threshold_tracks_updates_and_deletes() {
12535        let mut e = Engine::new();
12536        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
12537            .unwrap();
12538        for i in 0..50 {
12539            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
12540                .unwrap();
12541        }
12542        e.execute("ANALYZE t").unwrap();
12543        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
12544        // × max(50, 100) = 10. So 25 >= 10 → trigger.
12545        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
12546        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
12547        assert_eq!(e.tables_needing_analyze(), alloc::vec!["t".to_string()]);
12548    }
12549
12550    #[test]
12551    fn v4_envelope_loads_with_empty_statistics() {
12552        // Forge a v4 envelope by hand: catalog + users + pubs +
12553        // subs trailer, no statistics. A v6.2.0 reader must accept
12554        // it and surface an empty Statistics.
12555        let mut e = Engine::new();
12556        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
12557            .unwrap();
12558        let catalog = e.catalog.serialize();
12559        let users = crate::users::serialize_users(&e.users);
12560        let pubs = e.publications.serialize();
12561        let subs = e.subscriptions.serialize();
12562        let mut buf = Vec::new();
12563        buf.extend_from_slice(b"SPGENV01");
12564        buf.push(4u8);
12565        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
12566        buf.extend_from_slice(&catalog);
12567        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
12568        buf.extend_from_slice(&users);
12569        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
12570        buf.extend_from_slice(&pubs);
12571        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
12572        buf.extend_from_slice(&subs);
12573        let crc = spg_crypto::crc32::crc32(&buf);
12574        buf.extend_from_slice(&crc.to_le_bytes());
12575        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
12576        assert!(e2.statistics().is_empty());
12577    }
12578
12579    #[test]
12580    fn v1_v2_envelope_loads_with_empty_publications() {
12581        // A snapshot taken before v6.1.2 (no publication trailer,
12582        // envelope v2) must still deserialise — and the resulting
12583        // engine must report zero publications. Use the engine's own
12584        // round-trip with no publications: that emits v3 but with an
12585        // empty pubs block. Then forge a v2 envelope by hand to lock
12586        // the back-compat path.
12587        let mut e = Engine::new();
12588        // Force users to be non-empty so the snapshot takes the
12589        // envelope path rather than the bare-catalog fallback.
12590        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
12591            .unwrap();
12592
12593        // Forge an envelope v2: same shape as v3 but no pubs trailer.
12594        let catalog = e.catalog.serialize();
12595        let users = crate::users::serialize_users(&e.users);
12596        let mut buf = Vec::new();
12597        buf.extend_from_slice(b"SPGENV01");
12598        buf.push(2u8); // v2
12599        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
12600        buf.extend_from_slice(&catalog);
12601        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
12602        buf.extend_from_slice(&users);
12603        let crc = spg_crypto::crc32::crc32(&buf);
12604        buf.extend_from_slice(&crc.to_le_bytes());
12605
12606        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
12607        assert!(e2.publications().is_empty());
12608    }
12609}